////////////////////////////////////////////////////////////////////////////////
// MP4 DEMUXER
////////////////////////////////////////////////////////////////////////////////

#include "session.h"
#include <stdint.h>
#include <asm/delay.h>
#include <uapi/time.h>
#include <uapi/stdlib.h>
#include <uapi/string.h>
#ifdef CONFIG_MEDIA_EMULATE_ON_PC
#include "ff/ff.h"
#define BLOCK	CV_BITBUFS_SIZE
#else
#include "ff.h"
#endif
///////////////////////////////////// define ///////////////////////////////////
#define MKTAG(a, b, c, d) ((a) | ((b) << 8) | ((c) << 16) | ((unsigned)(d) << 24))
#define MP4_INDEX_LEN (48 * 1024) // multiple of 8 and 12, min 24
#define MP4_INDEX_BUFLEN (MP4_INDEX_LEN + 4) // the last 4 bytes for f_lseek and f_read bug fix
#define MP4_TRACK_INDEX_BUFLEN (MP4_INDEX_BUFLEN * 6)
#define MP4_AVDATA_LEN (32 * 1024) // 720p packet len: 20K ~ 150K
#define MP4_BUFLEN (MP4_TRACK_INDEX_BUFLEN * 2 + (MP4_AVDATA_LEN + 4) * 2) // less than 2M

#define MP4_TYPE_VIDEO 1
#define MP4_TYPE_AUDIO 2

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
// the buf should overplus 4 bytes
#define read_buf_4bytes_align(buf, len) do { \
		unsigned int _addr = f_tell(&punit->file); \
		unsigned int _addr_remain = _addr % 4; \
		unsigned int _buf_remain = (unsigned int)(buf) % 4; \
		unsigned int _len_remain = (len) % 4; \
		unsigned char *_src; \
		unsigned char *_dst; \
		int _i; \
		if ((_buf_remain || _addr_remain || _len_remain) && (len) > 4) { \
			if (_addr_remain) \
				f_lseek(&punit->file, _addr - _addr_remain); \
			f_read(&punit->file, (buf) + (_buf_remain ? 4 - _buf_remain : 0), (len) - _len_remain, NULL); \
			if (_addr_remain || _buf_remain) { \
				_src = (buf) + (_buf_remain ? 4 - _buf_remain : 0) + _addr_remain; \
				_dst = (buf); \
				for (_i = 0; _i < (len) - _len_remain; _i++) \
					*_dst++ = *_src++; \
			} \
			if (_len_remain + _addr_remain) \
				f_read(&punit->file, (buf) + (len) - _len_remain - _addr_remain, _len_remain + _addr_remain, NULL); \
		} else { \
			f_read(&punit->file, (buf), (len), NULL); \
		} \
	} while (0);
#endif /* MEDIA_FS_4BYTES_ALIGN_BUG_FIX */

///////////////////////////////////// struct ///////////////////////////////////
typedef struct {
	int total_count; 		// element total count
	int current_count; 		// the elem used count
	uint64_t origin_offset; // box offset addr in mp4 file
	uint64_t current_offset;// current elem offset addr in mp4 file
	int unit_size; 			// unit size of one element
	int cache_count;		// the cache count with elem
	int cache_current;		// the used index in elem cache
	uint8_t *bufaddr;		// the elem cache buffer

	uint32_t first_chunk;		// 'first chunk' of 'sample to chunk' box
	uint32_t samples_per_chunk;	// 'samples_per_chunk' of box
	uint32_t next_first_chunk;	// next 'first chunk' of box
} sample_to_chunk_t; // stsc box

typedef struct {
	int total_count;
	int current_count;
	uint64_t origin_offset;
	uint64_t current_offset;
	int unit_size;
	int cache_count;
	int cache_current;
	uint8_t *bufaddr;

	uint64_t chunk_offset;	// 'chunk offset' of 'chunk offset' box
	uint32_t cur_sample_in_chunk;
} chunk_offset_t; // stco or co64 box

typedef struct {
	int total_count;
	int current_count;
	uint64_t origin_offset;
	uint64_t current_offset;
	int unit_size;
	int cache_count;
	int cache_current;
	uint8_t *bufaddr;

	uint32_t sample_size;	// 'sample size' of 'sample size' box
} sample_size_t; // stsz box

typedef struct {
	int total_count;
	int current_count;
	uint64_t origin_offset;
	uint64_t current_offset;
	int unit_size;
	int cache_count;
	int cache_current;
	uint8_t *bufaddr;

	int sample_count;	// 'sample count' of 'decode time to sample' box
	int sample_delta;	// 'sample delta' of 'decode time to sample' box
	int cur_sample_read_num;
} decode_time_to_sample_t; // stts box

typedef struct {
	int total_count;
	int current_count;
	uint64_t origin_offset;
	uint64_t current_offset;
	int unit_size;
	int cache_count;
	int cache_current;
	uint8_t *bufaddr;

	int sample_number;	// 'sample number' of 'sync sample' box
} sync_sample_t; // stss box

typedef struct {
	int total_count;
	int current_count;
	uint64_t origin_offset;
	uint64_t current_offset;
	int unit_size;
	int cache_count;
	int cache_current;
	uint8_t *bufaddr;

	int sample_count;
	int sample_offset;
	int cur_sample_read_num;
	int start_offset; // the offset of all pts
} composition_time_to_sample_t; // ctts box

typedef struct {
	uint32_t creation_time;
	uint32_t modification_time;
	uint32_t time_scale; // units per second, stts / time_scale = xx second per frame
	uint32_t duration;
	int language;
} media_header_t; // mdhd box

typedef struct {
	int sample_rate;
	int sample_size;
	int channel_count;
} audio_config_t; // stsd/mp4a box

typedef struct {
	int width;
	int height;
	int volume;
} track_header_t; // tkhd box

typedef struct {
	const char *name;				// stream name
	int track_type; 				// 1: video, 2: audio
	int metadata_len;
	char metadata[4096]; 			// pps/sps/vps info get from stsd box

	/* mp4 index box */
	sample_to_chunk_t chunk_index; 	// stsc box, chunk id and sample count map
	chunk_offset_t chunk_offset;	// stco or co64, chunk or sample offset
	sample_size_t sample_size; 		// stsz, the video or audio packet size
	sync_sample_t sync_sample;		// stss, key frame
	decode_time_to_sample_t decode_time;			// stts, dts
	composition_time_to_sample_t composition_time;	// ctts, dts to pts

	/* mp4 normal box */
	media_header_t media_header;	// mdhd, time scale, duration
	audio_config_t audio_conf;		// stsd/mp4a
	track_header_t track_header;	// tkhd

	/* mp4 video audio packet cache */
	uint8_t *mdatcache;				// data cache
	uint32_t mdataddr;				// the start address of mdat data cache
	uint32_t mdatendpos;			// mdat end position
} mp4_track_t;

typedef struct {
	struct SESSION *session;
	struct SESSION *session_video, *session_audio;

	unsigned long load_size;		// file length
	unsigned long load_offset;
	int eof;
	psysbuf_t load_buf, loadbuf_video, loadbuf_audio;
	FIL file;

	int current_track_index; // start from 0
	mp4_track_t video_stream;
	mp4_track_t audio_stream;
	mp4_track_t *current_stream;
	int (*video_write_packet)(void *, media_bsf_opt_t, void *, phys_addr_t, int32_t *, phys_addr_t, int32_t);
	int (*audio_write_packet)(void *, media_bsf_opt_t, void *, phys_addr_t, int32_t *, phys_addr_t, int32_t);
#ifdef MEDIA_AV_SYNC
	session_av_sync *sync;
	session_av_sync avsync;
#endif
	unsigned char *readbuf;
	int skip_video_key_pkt;
	int skip_audio_pkt_num;
} session_unit_t;

typedef struct {
	uint32_t type;
	uint64_t len; // total box length (excluding the length and type fields)
} mp4_box_header_t;

typedef struct {
	uint32_t type;
	int (*parser)(session_unit_t *, mp4_box_header_t);
} mp4_parser_table_t;

typedef enum {
	FORWARD,
	REWIND
} direction_t;

//////////////////////////////////// variable //////////////////////////////////
static int mp4_read_parentbox(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_tkhd(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_mdhd(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_stsd(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_stsc(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_stco(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_co64(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_stsz(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_stts(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_stss(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_ctts(session_unit_t *punit, mp4_box_header_t atom);
static int mp4_read_mdat(session_unit_t *punit, mp4_box_header_t atom);
static int command_flag = 0;

mp4_parser_table_t mp4_parser_table[] = {
{ MKTAG('m','o','o','v'), mp4_read_parentbox },
{ MKTAG('t','r','a','k'), mp4_read_parentbox },
{ MKTAG('m','d','i','a'), mp4_read_parentbox },
{ MKTAG('m','i','n','f'), mp4_read_parentbox },
{ MKTAG('d','i','n','f'), mp4_read_parentbox },
{ MKTAG('s','t','b','l'), mp4_read_parentbox },
{ MKTAG('u','d','t','a'), mp4_read_parentbox },
{ MKTAG('t','k','h','d'), mp4_read_tkhd }, /* stream track id, volume, width and height */
{ MKTAG('m','d','h','d'), mp4_read_mdhd }, /* time scale, duration, units per second */
{ MKTAG('s','t','s','d'), mp4_read_stsd }, /* metadata/extradata, pps psp */
{ MKTAG('s','t','t','s'), mp4_read_stts }, /* dts per packet */
{ MKTAG('s','t','s','s'), mp4_read_stss }, /* sync sample, key frame */
{ MKTAG('s','t','s','z'), mp4_read_stsz }, /* sample size list */
{ MKTAG('s','t','s','c'), mp4_read_stsc }, /* sample to chunk list */
{ MKTAG('s','t','c','o'), mp4_read_stco }, /* chunk or sample offset(4 bytes) list */
{ MKTAG('c','o','6','4'), mp4_read_co64 }, /* chunk or sample offset(8 bytes) list */
{ MKTAG('c','t','t','s'), mp4_read_ctts }, /* dts to pts, some mp4 file have not this box */
{ MKTAG('m','d','a','t'), mp4_read_mdat }, /* skip the mdat tag and exit file read */
{0, NULL}
};

//////////////////////////////////// function //////////////////////////////////
/*!
 * \brief video and audio packet origin address in mdat box
 */
static
int mp4_read_mdat(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	int read_len;

	punit->video_stream.mdatcache = punit->readbuf + MP4_TRACK_INDEX_BUFLEN * 2;
	punit->audio_stream.mdatcache = punit->readbuf + MP4_TRACK_INDEX_BUFLEN * 2 + MP4_AVDATA_LEN + 4;
	read_len = (total_len > MP4_AVDATA_LEN ? MP4_AVDATA_LEN : total_len);
	unsigned int addr = f_tell(&punit->file);
	punit->video_stream.mdatendpos = addr + total_len;
	punit->audio_stream.mdatendpos = addr + total_len;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	int tmp_remainder = addr % 4;
	addr -= tmp_remainder;
	cur_len -= tmp_remainder;
#endif /* FS_4BYTES_ALIGN_BUG_FIX */
	cur_len += read_len;

	f_lseek(&punit->file, addr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	read_buf_4bytes_align(punit->video_stream.mdatcache, read_len);
#else
	f_read(&punit->file, punit->video_stream.mdatcache, read_len, NULL);
#endif
#ifdef MEDIA_USE_EDMA
	edma_copy(EDMA_CHN_MEDIA, (phys_addr_t)punit->audio_stream.mdatcache,
							(phys_addr_t)punit->video_stream.mdatcache, read_len);
#else
	memcpy(punit->audio_stream.mdatcache, punit->video_stream.mdatcache, read_len);
#endif
	punit->video_stream.mdataddr = addr;
	punit->audio_stream.mdataddr = addr;

	if (punit->current_track_index >= 1) {
		return -1; // exit stream info get, skip all file
	}

	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief stream type, video width, height
 *
 * {|size of atom|type     |version|flag|creation time|modification time|track id|reserved|duration|
 * {|4           |4("tkhd")|1      |3   |4 or 8       |4 or 8           |4       |4       |4 or 8  |
 *  |reserved|layer|alternate_group|volume|reserved|matrix|width|height|}
 *  |8       |2    |2              |2     |2       |36    |4    |4     |}
 */
static
int mp4_read_tkhd(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	uint16_t tmp16;
	unsigned char version;
	unsigned short volume;

	if (punit->current_track_index > 2) {
		goto exit;
	}

	f_read(&punit->file, &version, 1, NULL); cur_len += 1;
	/* skip the 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 3); cur_len += 3;
	if (version) { // version == 1
		/* skip the |creation time|modification time|track id|reserved|duration| */
		f_lseek(&punit->file, f_tell(&punit->file) + 32); cur_len += 32;
	} else {
		f_lseek(&punit->file, f_tell(&punit->file) + 20); cur_len += 20;
	}

	/* skip the |reserved|layer|alternate_group| */
	f_lseek(&punit->file, f_tell(&punit->file) + 12); cur_len += 12;

	f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
	volume = uswap_16(tmp16);
	//debug("\nvolume: 0x%04x\n", volume);
	//  video stream if volume is 0, others audio stream, 0x0100 max volume
	if (volume) {
		punit->current_stream = &punit->audio_stream;
		debug("\n---->current audio stream\n");
	} else {
		punit->current_stream = &punit->video_stream;
		debug("\n====>current video stream\n");
	}
	punit->current_stream->track_header.volume = volume;

	/* skip the |reserved|matrix| */
	f_lseek(&punit->file, f_tell(&punit->file) + 38); cur_len += 38;

	// width 2bytes integral + 2bytes decimal number
	f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
	//!!!: real width and height in stsd/avc1/avcC box
	debug("width: %d, ", uswap_16(tmp16));
	punit->current_stream->track_header.width = uswap_16(tmp16);
	// skip 2 byte decimal number
	f_lseek(&punit->file, f_tell(&punit->file) + 2); cur_len += 2;

	f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
	debug("height: %d\n", uswap_16(tmp16));
	punit->current_stream->track_header.height = uswap_16(tmp16);
	// skip 2 byte decimal number
	f_lseek(&punit->file, f_tell(&punit->file) + 2); cur_len += 2;

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief time scale and duration
 *
 * |size of atom|type     |version|flag|creation time|modification time|time scale|duration|language|
 * |4           |4("mdhd")|1      |3   |4 or 8       |4 or 8           |4         |4 or 8  |4       |
 */
static
int mp4_read_mdhd(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	mp4_track_t *stream = punit->current_stream;
	uint32_t tmp;

	if (punit->current_track_index > 2) {
		goto exit;
	}

	//!!!: distinguish version, if 1, the creation modification time and duration is 64bit,
	/* skip the 1byte version and 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* skip the creation and modification time */
	f_lseek(&punit->file, f_tell(&punit->file) + 8); cur_len += 8;

	/* get time scale */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	stream->media_header.time_scale = uswap_32(tmp);
	//debug("time scale: %d\n", stream->media_header.time_scale);

	/* read duration */
	//!!!: if version is 1, the duration is 64bit
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	stream->media_header.duration = uswap_32(tmp);
	debug("duration: %ds\n", stream->media_header.duration / stream->media_header.time_scale);

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief dts, time duration per packet
 *
 * |size of atom|type     |version|flag|count of time-to-sample|{|sample count|sample duration|}...
 * |4           |4("stts")|1      |3   |4                      |{|4           |4}...
 */
static
int mp4_read_stts(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	int read_len;
	int max_size;
	decode_time_to_sample_t *dt = &punit->current_stream->decode_time;
	uint32_t tmp, stts_count;

	if (punit->current_track_index < 1) {
		debug("stream num:%d less than 1!\n", punit->current_track_index);
		goto exit;
	} else if (punit->current_track_index > 2) {
		goto exit;
	}

	/* skip the 1byte version and 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* get time-to-sample count */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	stts_count = uswap_32(tmp);
	//debug("stts(sample duration, dts, cts)count: %d\n", stts_count);
	dt->total_count = stts_count;
	dt->origin_offset = f_tell(&punit->file);
	dt->current_offset = dt->origin_offset;
	dt->unit_size = 4 + 4;

	/* read the first data */
	max_size = dt->total_count * dt->unit_size;
	read_len = (max_size > MP4_INDEX_LEN) ? MP4_INDEX_LEN : max_size;
	dt->cache_count = read_len / dt->unit_size;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	read_buf_4bytes_align(dt->bufaddr, read_len);
#else
	f_read(&punit->file, dt->bufaddr, read_len, NULL);
#endif
	cur_len += read_len;

	dt->current_offset += (dt->cache_count * dt->unit_size);

	memcpy(&tmp, dt->bufaddr, 4);
	dt->sample_count = uswap_32(tmp);
	memcpy(&tmp, dt->bufaddr + 4, 4);
	dt->sample_delta = uswap_32(tmp);
	//debug("stts sample_delta: %d\n", dt->sample_delta);
	dt->cur_sample_read_num = 1;
	dt->current_count = 1;
	dt->cache_current = 1;

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief ctts box parser, match dts to pts
 *
 * |size of atom|type     |version|flag|entry count{|sample count|sample offset|}
 * |4           |4("ctts")|1      |3   |4          {|4           |4            |}
 */
static
int mp4_read_ctts(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	int read_len;
	int max_size;
	composition_time_to_sample_t *ct = &punit->current_stream->composition_time;
	uint32_t tmp, ctts_count;

	if (punit->current_track_index < 1) {
		debug("stream num:%d less than 1!\n", punit->current_track_index);
		goto exit;
	} else if (punit->current_track_index > 2) {
		goto exit;
	}

	/* skip the 1byte version and 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* get time-to-sample count */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	ctts_count = uswap_32(tmp);
	debug("ctts(composition time, dts to pts)count: %d\n", ctts_count);
	ct->total_count = ctts_count;
	ct->origin_offset = f_tell(&punit->file);
	ct->current_offset = ct->origin_offset;
	ct->unit_size = 4 + 4;

	/* read the first data */
	max_size = ct->total_count * ct->unit_size;
	read_len = (max_size > MP4_INDEX_LEN) ? MP4_INDEX_LEN : max_size;
	ct->cache_count = read_len / ct->unit_size;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	read_buf_4bytes_align(ct->bufaddr, read_len);
#else
	f_read(&punit->file, ct->bufaddr, read_len, NULL);
#endif
	cur_len += read_len;

	ct->current_offset += (ct->cache_count * ct->unit_size);

	memcpy(&tmp, ct->bufaddr, 4);
	ct->sample_count = uswap_32(tmp);
	memcpy(&tmp, ct->bufaddr + 4, 4);
	ct->sample_offset = uswap_32(tmp);
	ct->start_offset = -ct->sample_offset;
	ct->cur_sample_read_num = 1;
	ct->current_count = 1;
	ct->cache_current = 1;

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief key frame number
 *
 * |size of atom|type     |version|flag|count of sync-sample|{|sample number|}
 * |4           |4("stss")|1      |3   |4                   |{|4            |}
 */
static
int mp4_read_stss(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	int read_len;
	int max_size;
	sync_sample_t *ss = &punit->current_stream->sync_sample;
	uint32_t tmp, stss_count;

	if (punit->current_track_index < 1) {
		debug("stream num: %d less than 1!\n", punit->current_track_index);
		goto exit;
	} else if (punit->current_track_index > 2) {
		goto exit;
	}

	/* skip the 1byte version and 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* get sync-sample count */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	stss_count = uswap_32(tmp);
	//debug("stss(sync sample number)count: %d\n", stss_count);
	ss->total_count = stss_count;
	ss->origin_offset = f_tell(&punit->file);
	ss->current_offset = ss->origin_offset;
	ss->unit_size = 4;

	/* read the first data */
	max_size = ss->total_count * ss->unit_size;
	read_len = (max_size > MP4_INDEX_LEN) ? MP4_INDEX_LEN : max_size;
	ss->cache_count = read_len / ss->unit_size;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	read_buf_4bytes_align(ss->bufaddr, read_len);
#else
	f_read(&punit->file, ss->bufaddr, read_len, NULL);
#endif
	cur_len += read_len;

	ss->current_offset += (ss->cache_count * ss->unit_size);

	memcpy(&tmp, ss->bufaddr, 4);
	ss->sample_number = uswap_32(tmp);
	ss->current_count = 1;
	ss->cache_current = 1;

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief get video pps sps data, audio sample rate and size
 *
 * |size of atom|type     |version|flag|count of sample descriptions|
 * |4           |4("stsd")|1      |3   |4                           |
 *  ==== {sample descriptions}: avc1, hvc1, hev1, mp4a, mp4v and so on ====
 * |{|size of sample description1|type                |reserved|index|"data"|} {...}|
 * |{|4                          |4"avc1/hvc1/mp4a..."|6       |2    |n     |} {...}|
 *
 *  ==the {sample descriptions} of avc1 or hvc1 above
 * {VISUAL_SAMPLE_ENTRY_FIELDS {MP4_SAMPLE_ENTRY_FIELDS {|size|type   |} reserved|data_reference_index|}
 * {86                         {16                      {|4   |4"avc1"|} 6       |2                   |}
 * ...pre_defined|reserved|pre_defined|width|height|horiz_res|vert_res|reserved|
 * ...2          |2       |4*3        |2    |2     |4        |4       |4       |
 * ...frames_count|AVCDecoderConfigurationRecord{|compressr_name|bit_depth|pre_defined|}}
 * ...2           |                             {|32            |2        |2          |}}
 * ...{|"avcC"|}{btrt}{m4ds}
 * ...{|n     |}{    }{    }
 *
 *  ==the "avcC" above
 * {|avcC Size|type name|version|AVCProfileIndication|profile_compatibility|AVCLevelIndication|
 * {|4        |4"avcC"  |1      |1                   |1                    |1                 |
 * ...NALU len {|SPS num  |SPS len|SPS data|}{|SPS num|len|data|}{...}}{|count of pps|pps len|pps data|}
 * ...6'b1+2'bx{|3'b1+5'bx|2      |n       |}{|1      |2  |n   |}{...}}{|1           |2      |n       |}
 *
 *  ==the "hvcC" above
 * {|avcC Size|type name|version|profile_space+tiel_flag+profile_idc|compatibility_flag|
 * {|4        |4"hvcC"  |1(0x01)|1(2'b + 1'b + 5'b)                 |4                 |
 * ...constraint_indicator_flags|level_idc|reserved+min_segmentation_idc|reserved+parallelism|
 * ...6                         |1        |2(4'b1 + 12'b)               |1(5'b1 + 2'b)       |
 * ...chromaFormat |luma       |chroma     |avgFrameRate|constantFrameRate+numTemporalLayers+temporalIdNested+lengthSize|
 * ...1(6'b1 + 2'b)|1(5'b1+3'b)|1(5'b1+3'b)|2           |1(2'b + 3'b + 1'b + 2'b)                                       |
 * ...numOfArrays   |{|array_completeness+reserved+NAL_unit_type|numNalus|nalUnitLength|NALU_data|}}
 * ...1(VPS/SPS/PPS)|{|1(1'b + 1'b0 + 6'b)                      |2       |2            |n        |}}
 *
 * @see https://blog.csdn.net/u013752202/article/details/80557459
 * @see https://www.pianshen.com/article/7423331100/
 *
 *  ==the "esds" of mp4v above
 * ...
 * @see https://blog.csdn.net/brooknew/article/details/25959627
 * @see <ISO/IEC 14496-14:2003(E) Part 14: MP4 file format> page8 "5.6 Sample Description Boxes"
 * @see https://blog.csdn.net/badousuan/article/details/79519862

 *  ==the "mp4a" above
 * {|mp4a size|type name|rsv|dataRefIndex|rsv|channels|sample size|preDefined|rsv|sample rate<<16|{esds}}
 * {|4        |4"mp4a"  |6  |2           |8  |2       |2          |2         |2  |4              |{....}}
 *  ==the "esds" above (notice: 18B AudioSpecificConfig or 45~54B esds config will use "esds" tag)
 * {|esds size|type name|version+flag|tag|length_field|es_id|
 * {|4        |4"esds"  |4           |1  |8           |2    |
 * |streamDependenceFlag + URL_Flag + OCRstreamFlag + streamPriority|(if streamDependenceFlag)|
 * |1(1'b                + 1'b      + 1'b           + 5'b)          |(2) dependsOn_ES_ID      |
 * |(if URL_Flag) URLlength URLstring  |(if OCRstreamFlag)|...
 * |(1+URLlength) URLstring[URLlength] |2 OCR_ES_Id       |...
 *
 * @see https://blog.csdn.net/xiaojun111111/article/details/52133248
 * @see https://blog.csdn.net/coreavs163/article/details/8603026
 *
 * skip other "pasp", "btrt", "colr"...
 */
static
int mp4_read_stsd(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	uint32_t count_sample_descriptions;
	uint32_t tmp;
	int32_t first_size, description_size;
	uint32_t tag;
	char *tagstr = (char *)&tag;
	mp4_track_t *stream = punit->current_stream;
	uint16_t tmp16;
	int16_t tag_len, num_nalus, nal_unit_len;
	unsigned char array_num, sps_num, pps_num;
	unsigned char nal_type;
	int i;
	uint32_t start_code = 0x01000000; // start code of NALU

	// track number record
	punit->current_track_index++;

	if (punit->current_track_index > 2) {
		debug("support 2 stream(1 video and 1 audio) only!\n");
		goto exit;
	}

	/* skip 4 bytes of version and flag */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* read count of sample descriptions */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	count_sample_descriptions = uswap_32(tmp);

	/* read size of the first descriptions */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	description_size = uswap_32(tmp);
	first_size = description_size;
	/* read description tag */
	f_read(&punit->file, &tag, 4, NULL); cur_len += 4;
	debug("TAG: %c%c%c%c, len: %d\n", tagstr[0], tagstr[1], tagstr[2], tagstr[3], description_size);
	if (tag == MKTAG('a','v','c','1')) {
		stream->track_type = 1;
		stream->name = "h264";
	} else if (tag == MKTAG('h','v','c','1') || tag == MKTAG('h','e','v','1')) {
		stream->track_type = 1;
		stream->name = "hevc";
	} else if (tag == MKTAG('m','p','4','v')) {
		stream->track_type = 1;
		stream->name = "mpeg4";
		//TODO: add parse "stsd/mp4a/esds" tag
	} else if (tag == MKTAG('m','p','4','a')) {
		stream->track_type = 2;
		stream->name = "mp3 or aac";
		//!!!: the aac type detail please parse "stsd/mp4a/esds" tag
	} else {
		stream->track_type = 1;
		stream->name = "h264";
	}

	if (count_sample_descriptions < 1) {
		if (stream->track_type == 1)
			punit->video_stream.metadata_len = 0;
		else if (stream->track_type == 2)
			punit->audio_stream.metadata_len = 0;
		else //XXX: used 2 stream only now
			debug("support 2 stream(1 video and 1 audio) only!\n");
		goto exit;
	}

	if ((tag == MKTAG('h','v','c','1')) || (tag == MKTAG('a','v','c','1'))
			|| (tag == MKTAG('h','e','v','1')) || (tag == MKTAG('m','p','4','v'))) {
		/* skip 'avc1' |reserved|data_reference_index|pre_defined|reserved|pre_defined| */
		f_lseek(&punit->file, f_tell(&punit->file) + 24); cur_len += 24;
		// replace the width and height of 'tkhd' box
		f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
		//debug("the avc1/hvc1/hev1/mp4v width: %d\n", uswap_16(tmp16));
		stream->track_header.width = uswap_16(tmp16);
		f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
		//debug("the  avc1/hvc1/hev1/mp4v height: %d\n", uswap_16(tmp16));
		stream->track_header.height = uswap_16(tmp16);
		/* skip 'avc1' |horiz_res|vert_res|reserved|frames_count|compressr_name|bit_depth|pre_defined| */
		f_lseek(&punit->file, f_tell(&punit->file) + 50); cur_len += 50;

		/* read size of the sub tag of hvc1 or avc1 */
		f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
		description_size = uswap_32(tmp);

		/* read the sub tag of hvc1, avc1, hev1, mp4v */
		f_read(&punit->file, &tag, 4, NULL); cur_len += 4;
		//debug("TAG: %c%c%c%c, len: %d\n", tagstr[0], tagstr[1], tagstr[2], tagstr[3], description_size);
		first_size -= 90; // 4+24+2+2+50+4+4

		// skip "pasp", "btrt", "colr" and so on.
		while ((first_size > 8) && (tag != MKTAG('a','v','c','C'))
				&& (tag != MKTAG('h','v','c','C'))
				&& (tag != MKTAG('e','s','d','s'))) {
			description_size -= 8;
			if (description_size <= first_size) {
				f_lseek(&punit->file, f_tell(&punit->file) + description_size); cur_len += description_size;
				first_size -= description_size;
			} else 
				debug("tag size error!\n");

			/* read size of the sub tag of hvc1 or avc1 */
			f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
			description_size = uswap_32(tmp);
			first_size -= 4;

			/* read the sub tag of hvc1 or avc1 */
			f_read(&punit->file, &tag, 4, NULL); cur_len += 4;
			//debug("~TAG: %c%c%c%c, len: %d\n", tagstr[0], tagstr[1], tagstr[2], tagstr[3], description_size);
			first_size -= 4;
		}

		/* get sps private data of avcC */
		if ((tag == MKTAG('a','v','c','C'))) {
			/* skip 5 bytes of version, ProfileIndi, compatibility, LevelIndi and NALU len */
			f_lseek(&punit->file, f_tell(&punit->file) + 5); cur_len += 5;

			/* get 1 byte of SPS num */
			f_read(&punit->file, &sps_num, 1, NULL); cur_len += 1;
			sps_num &= 0x1F;
			//debug("SPS num: %d\n", sps_num);
			for (i = 0; i < sps_num; i++) {
				/* read SPS len */
				f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
				tag_len = uswap_16(tmp16);
				//debug("SPS len: %d\n", tag_len);
				/* read nal data */ //!!!: not to overflow the metadata buffer
				memcpy(stream->metadata + stream->metadata_len, &start_code, 4);
				stream->metadata_len += 4;
				f_read(&punit->file, stream->metadata + stream->metadata_len, tag_len, NULL); cur_len += tag_len;
				stream->metadata_len += tag_len;
			}

			/* get PPS */
			f_read(&punit->file, &pps_num, 1, NULL); cur_len += 1;
			//debug("PPS num: %d\n", pps_num);
			for (i = 0; i < pps_num; i++) {
				/* read PPS len */
				f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
				tag_len = uswap_16(tmp16);
				//debug("PPS len: %d\n", tag_len);
				/* read nal data */ //!!!: not to overflow the metadata buffer
				memcpy(stream->metadata + stream->metadata_len, &start_code, 4);
				stream->metadata_len += 4;
				f_read(&punit->file, stream->metadata + stream->metadata_len, tag_len, NULL); cur_len += tag_len;
				stream->metadata_len += tag_len;
			}
			//debug("metadata len: %d\n", stream->metadata_len);
		}

		/* get pps private data of hvcC */
		if ((tag == MKTAG('h','v','c','C'))) {
			/* skip 22 bytes of "hvcC" header */
			f_lseek(&punit->file, f_tell(&punit->file) + 22); cur_len += 22;
			/* read numOfArrays... */
			f_read(&punit->file, &array_num, 1, NULL); cur_len += 1;
			//debug("NAL array: %d\n", array_num);
			for (i = 0; i < array_num; i++) {
				f_read(&punit->file, &nal_type, 1, NULL); cur_len += 1;
				f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
				num_nalus = uswap_16(tmp16);
				f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
				nal_unit_len = uswap_16(tmp16);
				//debug("NAL type: 0x%x, numNalus: %d, len: %d\n", nal_type & 0x3F, num_nalus, nal_unit_len);
				/* read nal data */ //!!!: not to overflow the metadata buffer
				memcpy(stream->metadata + stream->metadata_len, &start_code, 4);
				stream->metadata_len += 4;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
				read_buf_4bytes_align(stream->metadata + stream->metadata_len, nal_unit_len);
#else
				f_read(&punit->file, stream->metadata + stream->metadata_len, nal_unit_len, NULL);
#endif
				cur_len += nal_unit_len;

				stream->metadata_len += nal_unit_len;
			}
			//debug("metadata len: %d\n", stream->metadata_len);
		}

		/* get private data of mp4v */
		if ((tag == MKTAG('e','s','d','s'))) {
			stream->metadata_len = 0;
			f_read(&punit->file, stream->metadata + stream->metadata_len, description_size, NULL); cur_len += description_size;
			stream->metadata_len += description_size;
		}
	}

	if (tag == MKTAG('m','p','4','a')) {
		/* skip 6 bytes of reserved */
		f_lseek(&punit->file, f_tell(&punit->file) + 6); cur_len += 6;
		/* skip 2 bytes of reference index */
		f_lseek(&punit->file, f_tell(&punit->file) + 2); cur_len += 2;
		/* skip 8 bytes of reserved */
		f_lseek(&punit->file, f_tell(&punit->file) + 8); cur_len += 8;
		f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
		debug("audio channel count: %d, ", uswap_16(tmp16));
		stream->audio_conf.channel_count = uswap_16(tmp16);

		f_read(&punit->file, &tmp16, 2, NULL); cur_len += 2;
		debug("sample size: %d, ", uswap_16(tmp16));
		stream->audio_conf.sample_size =  uswap_16(tmp16);

		/* skip 2 bytes of data pre defined */
		f_lseek(&punit->file, f_tell(&punit->file) + 2); cur_len += 2;
		/* skip 2 bytes of reserved */
		f_lseek(&punit->file, f_tell(&punit->file) + 2); cur_len += 2;
		f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
		debug("sample rate: %d\n", uswap_32(tmp) >> 16);
		stream->audio_conf.sample_rate = uswap_32(tmp) >> 16;

		/* read size of the 'esds' sub tag of 'mp4a' */
		f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
		description_size = uswap_32(tmp);
		/* read the 'esds'sub tag of 'mp4a' */
		f_read(&punit->file, &tag, 4, NULL); cur_len += 4;
		//debug("TAG: %c%c%c%c, len: %d\n", tagstr[0], tagstr[1], tagstr[2], tagstr[3], description_size);

		//!!!: get the audio config detail: acc type and so on
	}

	/* only parse the first sample descriptions, ignore other 'descriptions' if have more*/

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief get sample describe with chunk
 *
 * |size of atom|type     |version|flag|count of sample-to-chunk|
 * |4           |4("stsc")|1      |3   |4                       |
 * ...{|First chunk|Samples per chunk|Sample description ID|}{...  }
 * ...{|4          |4                |4                    |}{4*3*n}
 *
 */
static
int mp4_read_stsc(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	int read_len;
	int max_size;
	sample_to_chunk_t *sc = &punit->current_stream->chunk_index;
	uint32_t tmp, stsc_count;

	if (punit->current_track_index < 1) {
		debug("stream num:%d less than 1!\n", punit->current_track_index);
		goto exit;
	} else if (punit->current_track_index > 2) {
		goto exit;
	}

	/* skip the 1byte version and 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* get sample-to-chunk count */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	stsc_count = uswap_32(tmp);
	//debug("stsc(sample to chunk)count: %d\n", stsc_count);
	sc->total_count = stsc_count;
	sc->origin_offset = f_tell(&punit->file);
	sc->current_offset = sc->origin_offset;
	sc->unit_size = 4 * 3;

	/* read the first data */
	max_size = sc->total_count * sc->unit_size;
	read_len = (max_size > MP4_INDEX_LEN) ? MP4_INDEX_LEN : max_size;
	sc->cache_count = read_len / sc->unit_size;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	read_buf_4bytes_align(sc->bufaddr, read_len);
#else
	f_read(&punit->file, sc->bufaddr, read_len, NULL);
#endif
	cur_len += read_len;

	sc->current_offset += (sc->cache_count * sc->unit_size);

	memcpy(&tmp, sc->bufaddr, 4);
	sc->first_chunk = uswap_32(tmp);
	memcpy(&tmp, sc->bufaddr + 4, 4);
	sc->samples_per_chunk = uswap_32(tmp);
	// ignore sample_id
	if (sc->total_count > 1) {
		memcpy(&tmp, sc->bufaddr + 12, 4);
		sc->next_first_chunk = uswap_32(tmp);
	} else
		sc->next_first_chunk = sc->first_chunk;
	sc->current_count = 1;
	sc->cache_current = 1;

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief get all offset address of trunk or sample
 *
 * |size of atom|type     |version|flag|count of chunk offset|chunk offset|
 * |4           |4("stco")|1      |3   |4                    |4*n         |
 */
static
int mp4_read_stco(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	int read_len;
	int max_size;
	chunk_offset_t *co = &punit->current_stream->chunk_offset;
	uint32_t tmp, stco_count;

	if (punit->current_track_index < 1) {
		debug("stream num:%d less than 1!\n", punit->current_track_index);
		goto exit;
	} else if (punit->current_track_index > 2) {
		//debug("support 2 stream(1 video and 1 audio) only!\n");
		goto exit;
	}

	/* skip the 1byte version and 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* get sample-to-chunk count */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	stco_count = uswap_32(tmp);
	//debug("stco(chunk offset)count: %d\n", stco_count);
	co->total_count = stco_count;
	co->origin_offset = f_tell(&punit->file);
	co->current_offset = co->origin_offset;
	co->unit_size = 4;

	/* read the first data */
	max_size = co->total_count * co->unit_size;
	read_len = (max_size > MP4_INDEX_LEN) ? MP4_INDEX_LEN : max_size;
	co->cache_count = read_len / co->unit_size;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	read_buf_4bytes_align(co->bufaddr, read_len);
#else
	f_read(&punit->file, co->bufaddr, read_len, NULL);
#endif
	cur_len += read_len;

	co->current_offset += (co->cache_count * co->unit_size);

	memcpy(&tmp, co->bufaddr, co->unit_size);
	co->chunk_offset = uswap_32(tmp);
	co->cur_sample_in_chunk = 1;
	co->cache_current = 1;
	co->current_count = 1;

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief get all offset address of trunk or sample
 *
 * |size of atom|type     |version|flag|count of chunk offset|chunk offset|
 * |4           |4("co64")|1      |3   |4                    |8*n         |
 */
static
int mp4_read_co64(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	int read_len;
	int max_size;
	chunk_offset_t *co = &punit->current_stream->chunk_offset;
	uint32_t tmp, co64_count;
	uint64_t tmp64;

	if (punit->current_track_index < 1) {
		debug("stream num:%d less than 1!\n", punit->current_track_index);
		goto exit;
	} else if (punit->current_track_index > 2) {
		//debug("support 2 stream(1 video and 1 audio) only!\n");
		goto exit;
	}

	/* skip the 1byte version and 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* get sample-to-chunk count */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	co64_count = uswap_32(tmp);
	debug("co64(chunk offset)count: %d\n", co64_count);
	co->total_count = co64_count;
	co->origin_offset = f_tell(&punit->file);
	co->current_offset = co->origin_offset;
	co->unit_size = 8;

	/* read the first data */
	max_size = co->total_count * co->unit_size;
	read_len = (max_size > MP4_INDEX_LEN) ? MP4_INDEX_LEN : max_size;
	co->cache_count = read_len / co->unit_size;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	read_buf_4bytes_align(co->bufaddr, read_len);
#else
	f_read(&punit->file, co->bufaddr, read_len, NULL);
#endif
	cur_len += read_len;

	co->current_offset += (co->cache_count * co->unit_size);

	memcpy(&tmp64, co->bufaddr, co->unit_size);
	co->chunk_offset = uswap_64(tmp64);
	co->cur_sample_in_chunk = 1;
	co->cache_current = 1;
	co->current_count = 1;

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief size per sample
 *
 * |size of atom|type     |version|flag|sample zise|sample count|size per sample|
 * |4           |4("stsz")|1      |3   |4          |4           |4*n            |
 *
 */
static
int mp4_read_stsz(session_unit_t *punit, mp4_box_header_t atom)
{
	int total_len = atom.len;
	int cur_len = 0;
	int read_len;
	int max_size;
	sample_size_t *sz = &punit->current_stream->sample_size;
	uint32_t tmp, sample_size, sample_count;

	if (punit->current_track_index < 1) {
		debug("stream num:%d less than 1!\n", punit->current_track_index);
		goto exit;
	} else if (punit->current_track_index > 2) {
		//debug("support 2 stream(1 video and 1 audio) only!\n");
		goto exit;
	}

	/* skip the 1byte version and 3bytes flags */
	f_lseek(&punit->file, f_tell(&punit->file) + 4); cur_len += 4;

	/* get sample size */
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	sample_size = uswap_32(tmp); // don't care the value
	//debug("stsz sample size: %d\n", sample_size);
	f_read(&punit->file, &tmp, 4, NULL); cur_len += 4;
	sample_count = uswap_32(tmp);
	//debug("stsz sample count: %d\n", sample_count);

	sz->total_count = sample_count;
	sz->origin_offset = f_tell(&punit->file);
	sz->current_offset = sz->origin_offset;
	sz->unit_size = 4;

	/* read the first data */
	max_size = sz->total_count * sz->unit_size;
	read_len = (max_size > MP4_INDEX_LEN) ? MP4_INDEX_LEN : max_size;
	sz->cache_count = read_len / sz->unit_size;

#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
	read_buf_4bytes_align(sz->bufaddr, read_len);
#else
	f_read(&punit->file, sz->bufaddr, read_len, NULL);
#endif
	cur_len += read_len;

	sz->current_offset += (sz->cache_count * sz->unit_size);

	memcpy(&tmp, sz->bufaddr, sz->unit_size);
	sz->sample_size = uswap_32(tmp);
	sz->cache_current = 1;
	sz->current_count = 1;

exit:
	f_lseek(&punit->file, f_tell(&punit->file) + total_len - cur_len);

	return 0;
}

/*!
 * \brief   mp4 file format probe
 *
 * \param[in] punit pointer to file buffer
 * \return    0 if succeeded otherwise failed
 */
static
int mp4_probe(session_unit_t *punit)
{
	psysbuf_t load_buf;
	int ret = -EPROTONOSUPPORT;
	unsigned int tag;
	char probe_data[16];

	load_buf = punit->load_buf;
	if (load_buf == NULL) {
		load_buf = sysbuf_alloc(SYSBUF_GROUP_DATBUFS);
		if (load_buf == NULL)
			return -1;
		punit->load_buf = load_buf;
	}

	if(punit->load_size <= 0) {
		debug("The file size: %d error!\n");
		return -1;
	}

	f_read(&punit->file, (void*)HWADDR(load_buf->haddr), sizeof(probe_data), NULL);
	load_buf->size = sizeof(probe_data);

	/* skip box length, get box type */
	memcpy(&tag, (void*)HWADDR(load_buf->haddr) + 4, sizeof(tag));

	switch (tag) {
	case MKTAG('m','o','o','v'): // metadata box
	case MKTAG('m','d','a','t'): // media data box
	case MKTAG('f','t','y','p'): // file type box
		ret = 0;
		break;
	default:
		break;
	}

	return ret;
}

/*!
 * \brief read parent box and child box with loop
 */
static
int mp4_read_parentbox(session_unit_t *punit, mp4_box_header_t atom)
{
	int i;
	int ret = 0;
	uint32_t tmp;
	uint64_t tmp64;
	uint64_t total_size = 0;
	mp4_box_header_t a;

	while (total_size <= (atom.len - 8)) {
		int (*parser)(session_unit_t *, mp4_box_header_t) = NULL;
		if (atom.len >= 8) {
			/* get child box */
			f_read(&punit->file, &tmp, 4, NULL);
			a.len = uswap_32(tmp);
			f_read(&punit->file, &tmp, 4, NULL);
			a.type = tmp;
			total_size += 8;
			if (a.len == 1 && total_size + 8 <= atom.len) {
				/* if 64 bit extended size */
				f_read(&punit->file, &tmp64, 8, NULL);
				a.len = uswap_64(tmp64) - 8;
				total_size += 8;
				debug("The full box len:%ld\n", a.len);
			}
			a.len -= 8;
			if (a.len < 0)
				break;
			a.len = min(a.len, atom.len - total_size);

			/* parser function */
			for (i = 0; mp4_parser_table[i].type; i++) {
				if (mp4_parser_table[i].type == a.type) {
					parser = mp4_parser_table[i].parser;
					break;
				}
			}

			/* parser a box */
			if (!parser) {
				/* if skip leaf atoms data */
				f_lseek(&punit->file, f_tell(&punit->file) + a.len);
			} else {
				ret = parser(punit, a);
				if (ret)
					break;
				//!!!: skip garbage at atom end
			}

			total_size += a.len;
		} else {
			debug("parent box length error:%d\n", atom.len);
			return 0;
		}
	}

	return ret;
}

/*!
 * \brief     get stream information
 *
 * \param[in] punit pointer to file buffer
 * \return    0 if succeeded otherwise failed
 */
static
int mp4_streaminfo_get(session_unit_t *punit)
{
	mp4_box_header_t atom = {MKTAG('r','o','o','t')};
	atom.len = punit->load_size;

	f_lseek(&punit->file, 0); // rewind to origin
	punit->current_track_index = 0;
	mp4_read_parentbox(punit, atom);

	return 0;
}

/*!
 * \brief	update decoding time(stts box) buffer if need,
 *			and get one data of decoding time
 */
static inline
void update_decoding_time(session_unit_t *punit, direction_t direct, decode_time_to_sample_t *dt)
{
	uint32_t tmp;
	int addr;

	if (direct == FORWARD) {
		/* update decoding time(stts box) buffer if need */
		dt->cache_current++;
		if (dt->cache_current > dt->cache_count) {
			tmp = (dt->total_count - dt->current_count) * dt->unit_size;
			dt->cache_count = (tmp > MP4_INDEX_LEN ? MP4_INDEX_LEN : tmp) / dt->unit_size;
			f_lseek(&punit->file, dt->current_offset);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = dt->cache_count * dt->unit_size;
			read_buf_4bytes_align(dt->bufaddr, read_len);
#else
			f_read(&punit->file, dt->bufaddr, dt->cache_count * dt->unit_size, NULL);
#endif
			dt->current_offset += (dt->cache_count * dt->unit_size);
			dt->cache_current = 1;
		}
		dt->current_count++;

		/* get one decoding time data */
		dt->cur_sample_read_num = 1;
		memcpy(&tmp, dt->bufaddr + (dt->cache_current - 1) * dt->unit_size, 4);
		dt->sample_count = uswap_32(tmp);
		memcpy(&tmp, dt->bufaddr + (dt->cache_current - 1) * dt->unit_size + 4, 4);
		dt->sample_delta = uswap_32(tmp);

		dt->current_offset += dt->unit_size;
	} else { // REWIND
		dt->cache_current--;
		if (dt->cache_current < 1) {
			dt->current_offset -= (dt->cache_count * dt->unit_size);
			dt->cache_count = MP4_INDEX_LEN / dt->unit_size;
			addr = dt->current_offset - (dt->cache_count * dt->unit_size);
			f_lseek(&punit->file, addr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = dt->cache_count * dt->unit_size;
			read_buf_4bytes_align(dt->bufaddr, read_len);
#else
			f_read(&punit->file, dt->bufaddr, dt->cache_count * dt->unit_size, NULL);
#endif
			dt->cache_current = dt->cache_count;
		}
		dt->current_count--;

		dt->cur_sample_read_num = dt->sample_count;
		memcpy(&tmp, dt->bufaddr + (dt->cache_current - 1) * dt->unit_size, 4);
		dt->sample_count = uswap_32(tmp);
		memcpy(&tmp, dt->bufaddr + (dt->cache_current - 1) * dt->unit_size + 4, 4);
		dt->sample_delta = uswap_32(tmp);
	}
}

/*!
 * \brief	update composition time to sample(ctts box) buffer if need,
 *			and get one data of composition time to sample
 */
static inline
void update_composition_time(session_unit_t *punit, direction_t direct, composition_time_to_sample_t *ct)
{
	uint32_t tmp;
	int addr;

	if (direct == FORWARD) {
		/* update composition time to sample(ctts box) buffer if need */
		ct->cache_current++;
		if (ct->cache_current > ct->cache_count) {
			tmp = (ct->total_count - ct->current_count) * ct->unit_size;
			ct->cache_count = (tmp > MP4_INDEX_LEN ? MP4_INDEX_LEN : tmp) / ct->unit_size;
			f_lseek(&punit->file, ct->current_offset);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = ct->cache_count * ct->unit_size;
			read_buf_4bytes_align(ct->bufaddr, read_len);
#else
			f_read(&punit->file, ct->bufaddr, ct->cache_count * ct->unit_size, NULL);
#endif
			ct->current_offset += (ct->cache_count * ct->unit_size);
			ct->cache_current = 1;
		}
		ct->current_count++;

		/* get one composition time to sample data */
		ct->cur_sample_read_num = 1;
		memcpy(&tmp, ct->bufaddr + (ct->cache_current - 1) * ct->unit_size, 4);
		ct->sample_count = uswap_32(tmp);
		memcpy(&tmp, ct->bufaddr + (ct->cache_current - 1) * ct->unit_size + 4, 4);
		ct->sample_offset = uswap_32(tmp);
		ct->current_offset += ct->unit_size;
	} else { // REWIND
		ct->cache_current--;
		if (ct->cache_current < 1) {
			ct->current_offset -= (ct->cache_count * ct->unit_size);
			ct->cache_count = MP4_INDEX_LEN / ct->unit_size;
			addr = ct->current_offset - (ct->cache_count * ct->unit_size);
			f_lseek(&punit->file, addr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = ct->cache_count * ct->unit_size;
			read_buf_4bytes_align(ct->bufaddr, read_len);
#else
			f_read(&punit->file, ct->bufaddr, ct->cache_count * ct->unit_size, NULL);
#endif
			ct->cache_current = ct->cache_count;
		}
		ct->current_count--;

		ct->cur_sample_read_num = ct->sample_count;
		memcpy(&tmp, ct->bufaddr + (ct->cache_current - 1) * ct->unit_size, 4);
		ct->sample_count = uswap_32(tmp);
		memcpy(&tmp, ct->bufaddr + (ct->cache_current - 1) * ct->unit_size + 4, 4);
		ct->sample_offset = uswap_32(tmp);
	}
}

/*!
 * \brief	update sample size(stsz box) buffer if need,
 *			and get one data of sample size
 */
static inline
void update_sample_size(session_unit_t *punit, direction_t direct, sample_size_t *ss)
{
	uint32_t tmp;
	int addr;

	if (direct == FORWARD) {
		ss->cache_current++;
		if (ss->cache_current > ss->cache_count) {
			tmp = (ss->total_count - ss->current_count) * ss->unit_size;
			ss->cache_count = (tmp > MP4_INDEX_LEN ? MP4_INDEX_LEN : tmp) / ss->unit_size;
			f_lseek(&punit->file, ss->current_offset);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = ss->cache_count * ss->unit_size;
			read_buf_4bytes_align(ss->bufaddr, read_len);
#else
			f_read(&punit->file, ss->bufaddr, ss->cache_count * ss->unit_size, NULL);
#endif
			ss->current_offset += (ss->cache_count * ss->unit_size);
			ss->cache_current = 1;
		}
		ss->current_count++;
		memcpy(&tmp, ss->bufaddr + (ss->cache_current - 1) * ss->unit_size, 4);
		ss->sample_size = uswap_32(tmp);
	} else { // REWIND
		ss->cache_current--;
		if (ss->cache_current < 1) {
			ss->current_offset -= (ss->cache_count * ss->unit_size);
			ss->cache_count = MP4_INDEX_LEN / ss->unit_size;
			addr = ss->current_offset - (ss->cache_count * ss->unit_size);
			f_lseek(&punit->file, addr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = ss->cache_count * ss->unit_size;
			read_buf_4bytes_align(ss->bufaddr, read_len);
#else
			f_read(&punit->file, ss->bufaddr, ss->cache_count * ss->unit_size, NULL);
#endif
			ss->cache_current = ss->cache_count;
		}
		ss->current_count--;
		memcpy(&tmp, ss->bufaddr + (ss->cache_current - 1) * ss->unit_size, 4);
		ss->sample_size = uswap_32(tmp);
	}
}

/*!
 * \brief	update chunk offset(stco or co64 box) buffer if need,
 *			and get one data of chunk offset
 */
static inline
void update_chunk_offset(session_unit_t *punit, direction_t direct, chunk_offset_t *co)
{
	uint32_t tmp;
	uint64_t tmp64;
	int addr;

	if (direct == FORWARD) {
		co->cache_current++;
		if (co->cache_current > co->cache_count) {
			tmp = (co->total_count - co->current_count) * co->unit_size;
			co->cache_count = (tmp > MP4_INDEX_LEN ? MP4_INDEX_LEN : tmp) / co->unit_size;
			f_lseek(&punit->file, co->current_offset);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = co->cache_count * co->unit_size;
			read_buf_4bytes_align(co->bufaddr, read_len);
#else
			f_read(&punit->file, co->bufaddr, co->cache_count * co->unit_size, NULL);
#endif
			co->current_offset += (co->cache_count * co->unit_size);
			co->cache_current = 1;
		}
		co->current_count++;

		if (co->unit_size == 4) {
			memcpy(&tmp, co->bufaddr + (co->cache_current - 1) * co->unit_size, co->unit_size);
			co->chunk_offset = uswap_32(tmp);
		} else { // if is 8
			memcpy(&tmp64, co->bufaddr + (co->cache_current - 1) * co->unit_size, co->unit_size);
			co->chunk_offset = uswap_64(tmp64);
		}
	} else { // REWIND
		co->cache_current--;
		if (co->cache_current < 1) {
			co->current_offset -= (co->cache_count * co->unit_size);
			co->cache_count = MP4_INDEX_LEN / co->unit_size;
			addr = co->current_offset - (co->cache_count * co->unit_size);
			f_lseek(&punit->file, addr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = co->cache_count * co->unit_size;
			read_buf_4bytes_align(co->bufaddr, read_len);
#else
			f_read(&punit->file, co->bufaddr, co->cache_count * co->unit_size, NULL);
#endif
			co->cache_current = co->cache_count;
		}
		co->current_count--;

		if (co->unit_size == 4) {
			memcpy(&tmp, co->bufaddr + (co->cache_current - 1) * co->unit_size, co->unit_size);
			co->chunk_offset = uswap_32(tmp);
		} else { // if is 8
			memcpy(&tmp64, co->bufaddr + (co->cache_current - 1) * co->unit_size, co->unit_size);
			co->chunk_offset = uswap_64(tmp64);
		}
	}
}

/*!
 * \brief	update sample to chunk(stsc box) buffer if need,
 *			and get one data of sample to chunk
 */
static inline
void update_sample_to_chunk(session_unit_t *punit, direction_t direct, sample_to_chunk_t *sc)
{
	uint32_t tmp;
	int addr;

	if (direct == FORWARD) {
		sc->cache_current++;
		if (sc->cache_current > sc->cache_count) {
			tmp = (sc->total_count - sc->current_count) * sc->unit_size;
			sc->cache_count = (tmp > MP4_INDEX_LEN ? MP4_INDEX_LEN : tmp) / sc->unit_size;
			f_lseek(&punit->file, sc->current_offset);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = sc->cache_count * sc->unit_size;
			read_buf_4bytes_align(sc->bufaddr, read_len);
#else
			f_read(&punit->file, sc->bufaddr, sc->cache_count * sc->unit_size, NULL);
#endif
			sc->current_offset += (sc->cache_count * sc->unit_size);
			sc->cache_current = 1;
		}
		sc->current_count++;

		memcpy(&tmp, sc->bufaddr + (sc->cache_current - 1) * sc->unit_size, 4);
		sc->first_chunk = uswap_32(tmp);
		memcpy(&tmp, sc->bufaddr + (sc->cache_current - 1) * sc->unit_size + 4, 4);
		sc->samples_per_chunk = uswap_32(tmp);
		if (sc->current_count >= sc->total_count) {
			sc->next_first_chunk = sc->first_chunk;
		} else {
			sc->cache_current++;
			if (sc->cache_current > sc->cache_count) {
				tmp = (sc->total_count - sc->current_count) * sc->unit_size;
				sc->cache_count = (tmp > MP4_INDEX_LEN ? MP4_INDEX_LEN : tmp) / sc->unit_size;
				f_lseek(&punit->file, sc->current_offset);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
				uint32_t read_len = sc->cache_count * sc->unit_size;
				read_buf_4bytes_align(sc->bufaddr, read_len);
#else
				f_read(&punit->file, sc->bufaddr, sc->cache_count * sc->unit_size, NULL);
#endif
				sc->current_offset += (sc->cache_count * sc->unit_size);
				sc->cache_current = 0;
				memcpy(&tmp, sc->bufaddr, 4);
				sc->next_first_chunk = uswap_32(tmp);
			} else {
				sc->cache_current--;
				memcpy(&tmp, sc->bufaddr + (sc->cache_current - 1) * sc->unit_size + 12, 4);
				sc->next_first_chunk = uswap_32(tmp);
			}
		}
	} else { // REWIND
		sc->cache_current--;
		if (sc->cache_current < 1) {
			sc->current_offset -= (sc->cache_count * sc->unit_size);
			sc->cache_count = MP4_INDEX_LEN / sc->unit_size;
			addr = sc->current_offset - (sc->cache_count * sc->unit_size);
			f_lseek(&punit->file, addr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = sc->cache_count * sc->unit_size;
			read_buf_4bytes_align(sc->bufaddr, read_len);
#else
			f_read(&punit->file, sc->bufaddr, sc->cache_count * sc->unit_size, NULL);
#endif
			sc->cache_current = sc->cache_count;
		}
		sc->current_count--;

		sc->next_first_chunk = sc->first_chunk;
		memcpy(&tmp, sc->bufaddr + (sc->cache_current - 1) * sc->unit_size, 4);
		sc->first_chunk = uswap_32(tmp);
		memcpy(&tmp, sc->bufaddr + (sc->cache_current - 1) * sc->unit_size + 4, 4);
		sc->samples_per_chunk = uswap_32(tmp);
	}
}


/*!
 * \brief	update sync sample(stss box) buffer if need,
 *			and get now, next and last data of sync sample
 */
static int tmp_value = 0;
static inline
void update_sync_sample(session_unit_t *punit, direction_t direct, sync_sample_t *ss)
{
	uint32_t tmp;
	int addr;

	if (direct == FORWARD) {
		ss->cache_current++;
		if (ss->cache_current > ss->cache_count) {
			tmp = (ss->total_count - ss->current_count) * ss->unit_size;
			ss->cache_count = (tmp > MP4_INDEX_LEN ? MP4_INDEX_LEN : tmp) / ss->unit_size;
			f_lseek(&punit->file, ss->current_offset);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = ss->cache_count * ss->unit_size;
			read_buf_4bytes_align(ss->bufaddr, read_len);
#else
			f_read(&punit->file, ss->bufaddr, ss->cache_count * ss->unit_size, NULL);
#endif
			ss->current_offset += (ss->cache_count * ss->unit_size);
			ss->cache_current = 1;
		}
		ss->current_count++;
		memcpy(&tmp, ss->bufaddr + (ss->cache_current - 1) * ss->unit_size, 4);
		ss->sample_number = uswap_32(tmp);
	} else { // REWIND
		ss->cache_current--;
		if (ss->cache_current < 1) {
			ss->current_offset -= (ss->cache_count * ss->unit_size);
			ss->cache_count = MP4_INDEX_LEN / ss->unit_size;
			addr = ss->current_offset - (ss->cache_count * ss->unit_size);
			f_lseek(&punit->file, addr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			uint32_t read_len = ss->cache_count * ss->unit_size;
			read_buf_4bytes_align(ss->bufaddr, read_len);
#else
			f_read(&punit->file, ss->bufaddr, ss->cache_count * ss->unit_size, NULL);
#endif
			ss->cache_current = ss->cache_count;
		}
		ss->current_count--;
		memcpy(&tmp, ss->bufaddr + (ss->cache_current - 1) * ss->unit_size, 4);
		ss->sample_number = uswap_32(tmp);
	}
}

/*!
 * \brief	update mdat box buffer if need,
 *			and get one packet data from mdat box
 */
static inline
void get_packet_from_mdat(session_unit_t *punit, direction_t direct, chunk_offset_t *co,
						int32_t read_size, psysbuf_t load_buf, uint32_t cur_pos)
{
	uint32_t tmp;
	uint64_t tmp64;
	mp4_track_t *track = punit->current_stream;
	phys_addr_t src, dst;
	int32_t len;
	int read_again_flag = 0;

	if (direct != FORWARD)
		return;

	if ((read_size + 4) >= MP4_AVDATA_LEN) {
		f_lseek(&punit->file, co->chunk_offset + cur_pos);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
		read_buf_4bytes_align((void *)HWADDR(load_buf->haddr), read_size);
#else
		f_read(&punit->file, (void *)HWADDR(load_buf->haddr), read_size, NULL);
#endif
		return;
	}

read_again:
	if (co->chunk_offset + cur_pos >= track->mdataddr) { //modify on 20210111, fix the bug of aac error info
		if (co->chunk_offset + cur_pos + read_size <= track->mdataddr + MP4_AVDATA_LEN) {
#ifdef MEDIA_USE_EDMA
			dst = HWADDR(load_buf->haddr);
			src = (phys_addr_t)track->mdatcache + co->chunk_offset + cur_pos - track->mdataddr;
			len = read_size;
			dma_cache_wback_inv(dst, len);
			edma_copy(EDMA_CHN_MEDIA, dst, src, len);
#else
			memcpy((void *)HWADDR(load_buf->haddr), track->mdatcache + co->chunk_offset
					+ cur_pos - track->mdataddr, read_size);
#endif
		} else {
			// consume 200~900ms sometimes in data get
			track->mdataddr = (((co->chunk_offset + cur_pos) >> 2) << 2);
			tmp64 = track->mdatendpos - track->mdataddr;
			tmp = (tmp64 > MP4_AVDATA_LEN ? MP4_AVDATA_LEN : tmp64);
			/* cache the block mdat */
			f_lseek(&punit->file, track->mdataddr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			read_buf_4bytes_align(track->mdatcache, tmp);
#else
			f_read(&punit->file, track->mdatcache, tmp, NULL);
#endif
			if (read_again_flag) {
				/* read a packet */
				debug("cache data not use1\n");
				f_lseek(&punit->file, co->chunk_offset + cur_pos);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
				read_buf_4bytes_align((void *)HWADDR(load_buf->haddr), read_size);
#else
				f_read(&punit->file, (void *)HWADDR(load_buf->haddr), read_size, NULL);
#endif
			} else {
				read_again_flag = 1;
				goto read_again;
			}
		}
	} else {
		track->mdataddr = (((co->chunk_offset + cur_pos) >> 2) << 2);
		tmp64 = track->mdatendpos - track->mdataddr;
		tmp = (tmp64 > MP4_AVDATA_LEN ? MP4_AVDATA_LEN : tmp64);
		/* cache the block mdat */
		f_lseek(&punit->file, track->mdataddr);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
		read_buf_4bytes_align(track->mdatcache, tmp);
#else
		f_read(&punit->file, track->mdatcache, tmp, NULL);
#endif
		if (read_again_flag) {
			/* read a packet */
			debug("cache data not use2\n");
			f_lseek(&punit->file, co->chunk_offset + cur_pos);
#ifdef MEDIA_FS_4BYTES_ALIGN_BUG_FIX
			read_buf_4bytes_align((void *)HWADDR(load_buf->haddr), read_size);
#else
			f_read(&punit->file, (void *)HWADDR(load_buf->haddr), read_size, NULL);
#endif
		} else {
			read_again_flag = 1;
			goto read_again;
		}
	}
}

static
int32_t packet_skip(session_unit_t *punit, int track_type, int skip_pkt)
{
	sample_size_t *ss;
	chunk_offset_t *co;
	sample_to_chunk_t *sc;
	decode_time_to_sample_t *dt;
	composition_time_to_sample_t *ct;
	unsigned int current_packet_num;
	int packet_duration;
	uint64_t *total_dts ;
	uint64_t *pts = NULL;
	float *last_decode_time;

	if (track_type == MP4_TYPE_VIDEO) {
		ss = &punit->video_stream.sample_size;
		co = &punit->video_stream.chunk_offset;
		sc = &punit->video_stream.chunk_index;
		dt = &punit->video_stream.decode_time;
		ct = &punit->video_stream.composition_time;
#ifdef MEDIA_AV_SYNC
		total_dts = &punit->sync->v_cur_pkt_dts;
		pts = &punit->sync->v_cur_pkt_pts1;
		last_decode_time = &punit->sync->v_last_decode_time;
#endif
	} else {
		ss = &punit->audio_stream.sample_size;
		co = &punit->audio_stream.chunk_offset;
		sc = &punit->audio_stream.chunk_index;
		dt = &punit->audio_stream.decode_time;
		ct = &punit->audio_stream.composition_time;
#ifdef MEDIA_AV_SYNC
		total_dts = &punit->sync->a_cur_pkt_dts;
		last_decode_time = &punit->sync->a_last_decode_time;
#endif
	}
	current_packet_num = ss->current_count;
	if (ss->current_count > ss->total_count) {
		return 0;
	}

	/* fast forward */
	while (ss->current_count < skip_pkt) {
#ifdef MEDIA_AV_SYNC
		*total_dts += dt->sample_delta;
		*last_decode_time = (float)dt->sample_delta;
		if (pts)
			*pts = *total_dts + ct->sample_offset;
#endif
		/* prepare for next times read */
		dt->cur_sample_read_num++;
		if (dt->current_count < dt->total_count) {
			if (dt->cur_sample_read_num > dt->sample_count)
				update_decoding_time(punit, FORWARD, dt);
		}
		if (ct->total_count > 0) {
			ct->cur_sample_read_num++;
			if (ct->current_count < ct->total_count) {
				if (ct->cur_sample_read_num > ct->sample_count)
					update_composition_time(punit, FORWARD, ct);
			}
		}
		if (sc->total_count <= 1) {
			/* have not chunk offset, only the sample offset */
			update_sample_size(punit, FORWARD, ss);
			update_chunk_offset(punit, FORWARD, co);
		} else {
			/* parse the chunk list */
			co->chunk_offset += ss->sample_size;
			update_sample_size(punit, FORWARD, ss);

			co->cur_sample_in_chunk++;
			if (co->cur_sample_in_chunk > sc->samples_per_chunk) {
				/* update chunk offset */
				co->cur_sample_in_chunk = 1;
				update_chunk_offset(punit, FORWARD, co);

				if (co->current_count >= sc->next_first_chunk) {
					/* update chunk_index prepare for next */
					if (sc->current_count >= sc->total_count) {
					} else
						update_sample_to_chunk(punit, FORWARD, sc);
				}
			}
		}
	}

	/* fast rewind */
	while (ss->current_count > skip_pkt) {
#ifdef MEDIA_AV_SYNC
		*total_dts -= dt->sample_delta;
		*last_decode_time = (float)dt->sample_delta;
		if (pts)
			*pts = *total_dts + ct->sample_offset;
#endif
		/* prepare for next times read */
		dt->cur_sample_read_num--;
		if (dt->current_count > 1) {
			if (dt->cur_sample_read_num < 1)
				update_decoding_time(punit, REWIND, dt);
		}
		if (ct->total_count > 1) {
			ct->cur_sample_read_num--;
			if (ct->current_count > 1) {
				if (ct->cur_sample_read_num < 1)
					update_composition_time(punit, REWIND, ct);
			}
		}
		if (sc->total_count == 1) {
			/* have not chunk offset, only the sample offset */
			update_sample_size(punit, REWIND, ss);
			update_chunk_offset(punit, REWIND, co);
		} else {
			/* parse the chunk list */
			update_sample_size(punit, REWIND, ss);

			co->chunk_offset -= ss->sample_size;

			co->cur_sample_in_chunk--;
			if (co->cur_sample_in_chunk < 1) {
				/* update chunk offset */
				update_chunk_offset(punit, REWIND, co);

				if (co->current_count < sc->first_chunk)
					update_sample_to_chunk(punit, REWIND, sc);
				co->cur_sample_in_chunk = sc->samples_per_chunk;
			}
		}
	}

	return 0;
}

/*!
 * \brief get one packet from mp4 file
 *
 * \param[in] punit	the buffer save data
 * \param[in] try	is get preview data or real data
 * \param[in] rlen  the read length if get nal slice
 * \param[in] pos	the read position if get real data
 * \param[in] ss/co/sc the mp4 index list
 */
static
int32_t packet_read(session_unit_t *punit, media_read_opt_t try, uint32_t rlen, uint32_t *pos)
{
	int32_t read_size = -1;
	psysbuf_t load_buf = punit->load_buf;
	uint32_t cur_pos = *pos;
	mp4_track_t *track = punit->current_stream;
	decode_time_to_sample_t *dt;
	sample_size_t *ss;
	chunk_offset_t *co;
	sample_to_chunk_t *sc;
	composition_time_to_sample_t *ct;
	phys_addr_t src, dst;
	int32_t len;

	if (MP4_TYPE_AUDIO == track->track_type) {
		dt = &punit->audio_stream.decode_time;
		ss = &punit->audio_stream.sample_size;
		co = &punit->audio_stream.chunk_offset;
		sc = &punit->audio_stream.chunk_index;
		ct = &punit->audio_stream.composition_time;
	} else if (MP4_TYPE_VIDEO == track->track_type) {
		dt = &punit->video_stream.decode_time;
		ss = &punit->video_stream.sample_size;
		co = &punit->video_stream.chunk_offset;
		sc = &punit->video_stream.chunk_index;
		ct = &punit->video_stream.composition_time;
	}

	if (ss->current_count > ss->total_count) {
		punit->eof = 1;
		return 0;
	}

	if ((try == PREVIEW_FIRST_SLICE) || (try == READ_FIRST_SLICE)
		|| (try == PREVIEW_PACKET)) {
		cur_pos = 0;
		read_size = rlen;
	} else if (try == READ_PACKET) {
		cur_pos = 0;
		read_size = ss->sample_size;
	} else // PREVIEW_OTHER_CLICE, READ_OTHER_CLICE, READ_CLICE_PURE
		read_size = rlen;

#ifdef MEDIA_AV_SYNC
	if (try == READ_PACKET || try == READ_FIRST_SLICE) {
		if (MP4_TYPE_AUDIO == track->track_type) {
			punit->sync->a_cur_pkt_dts += dt->sample_delta;
			punit->sync->a_last_decode_time = (float)dt->sample_delta;
		} else if (MP4_TYPE_VIDEO == track->track_type) {
			punit->sync->v_cur_pkt_dts += dt->sample_delta;
			//the mdat order is decode(dts) order!
			punit->sync->v_cur_pkt_pts1 = punit->sync->v_cur_pkt_dts + (ct->sample_offset + ct->start_offset);
			punit->sync->v_last_decode_time = (float)dt->sample_delta;
		}
	}
#endif

	/* read one packet data or slice data, use buffer */
	get_packet_from_mdat(punit, FORWARD, co, read_size, load_buf, cur_pos);

	load_buf->size = read_size;
	if (try == PREVIEW_PACKET)
		read_size = ss->sample_size;
	if ((try == PREVIEW_PACKET) || (try == PREVIEW_FIRST_SLICE)
		|| (try == PREVIEW_OTHER_CLICE))
		goto exit;

	if ((try == READ_FIRST_SLICE) || (try == READ_OTHER_CLICE)
		|| (try == READ_CLICE_PURE)) {
		cur_pos += read_size;
		if (cur_pos < ss->sample_size)
			goto exit;
	}

	/* prepare for next times read */
	if (dt->current_count <= dt->total_count) {
		dt->cur_sample_read_num++;
		if (dt->cur_sample_read_num > dt->sample_count)
			update_decoding_time(punit, FORWARD, dt);
	}
	if (ct->total_count > 0 && ct->current_count <= ct->total_count) {
		ct->cur_sample_read_num++;
		if (ct->cur_sample_read_num > ct->sample_count)
			update_composition_time(punit, FORWARD, ct);
	}
	if (sc->total_count <= 1) {
		/* have not chunk offset, only the sample offset */
		update_sample_size(punit, FORWARD, ss);
		update_chunk_offset(punit, FORWARD, co);
	} else {
		/* parse the chunk list */
		co->chunk_offset += ss->sample_size;
		update_sample_size(punit, FORWARD, ss);

		co->cur_sample_in_chunk++;
		if (co->cur_sample_in_chunk > sc->samples_per_chunk) {
			/* update chunk offset */
			co->cur_sample_in_chunk = 1;
			update_chunk_offset(punit, FORWARD, co);

			if (co->current_count >= sc->next_first_chunk) {
				/* update chunk_index prepare for next */
				if (sc->current_count >= sc->total_count) {
				} else
					update_sample_to_chunk(punit, FORWARD, sc);
			}
		}
	}

exit:
	if ((try == READ_FIRST_SLICE) || (try == READ_OTHER_CLICE)
		|| (try == READ_CLICE_PURE))
		*pos = cur_pos;

	return read_size;
}

static
psysbuf_t alloc_video_buf(session_unit_t *punit)
{
	psysbuf_t buf;

	/* get video send buffer */
	if (punit->session_video == NULL)
		return NULL;
	buf = punit->loadbuf_video;
	if (buf == NULL) {
		buf = sysbuf_alloc(SYSBUF_GROUP_CV_BITBUFS);
		if (buf != NULL)
			punit->loadbuf_video = buf;
		else
			return NULL;
	}
	if (punit->loadbuf_video == NULL)
		return NULL;
	if (!punit->video_write_packet)
		return NULL;

	buf->offset = 0;
	return buf;
}

static
void push_video_buf(session_unit_t *punit, psysbuf_t buf)
{
	if (buf->size > 0) {
		punit->loadbuf_video = NULL;
		SessionBufferPush(punit->session_video, buf);
	}
}

static
psysbuf_t alloc_audio_buf(session_unit_t *punit)
{
	psysbuf_t buf;

	/* get audio send buffer */
	if (punit->session_audio == NULL)
		return NULL;
	buf = punit->loadbuf_audio;
	if (buf == NULL) {
		buf = sysbuf_alloc(SYSBUF_GROUP_DATBUFS);
		if (buf != NULL)
			punit->loadbuf_audio = buf;
		else
			return NULL;
	}
	if (punit->loadbuf_audio == NULL)
		return NULL;
	if (!punit->audio_write_packet)
		return NULL;

	return buf;
}

static
void push_audio_buf(session_unit_t *punit, psysbuf_t buf)
{
	if (buf->size > 0) {
		punit->loadbuf_audio = NULL;
		SessionBufferPush(punit->session_audio, buf);
	}
}

/* get packet data */
static
long SESSIONAPI(StreamLoad)(session_unit_t *punit)
{
	psysbuf_t load_buf, buf;
	int32_t ret = 0;
	sample_size_t *ss = &punit->video_stream.sample_size;
	sample_size_t *ass = &punit->audio_stream.sample_size;
	uint32_t read_size;
	uint32_t pos;
	nalustate_t nalstate;
	int slice_flag = 0;
	int32_t remain_size;
	int32_t total_packet_size, cur_packet_size = 0;
	int first_enter = 0;
	media_read_opt_t read_preview_opt, read_data_opt;
	media_bsf_opt_t bsf_preview_opt, bsf_data_opt;
	int block_len;
#ifdef MEDIA_AV_SYNC
	media_pkt_sync_t *pkt_sync;
#endif
	static uint32_t switch_flag = 0;

	/* get the buffer parse packet of mp4 */
	load_buf = punit->load_buf;
	if (load_buf == NULL) {
		load_buf = sysbuf_alloc(SYSBUF_GROUP_DATBUFS);
		if (load_buf == NULL)
			return -1;
		punit->load_buf = load_buf;
	}

	block_len = load_buf->maxsize;

	switch_flag++;
	if (switch_flag % 2) // temporary fix the bug of plenty of time resumed in StreamLoad()
		goto audio;
	/* parse video */
	if (punit->session_video != NULL) {
		if (punit->session_video->state != SSTATE_RUNNING_IDLE)
			goto audio;
	} else {
		goto audio;
	}
	videoconfig_t vcfg = {0};
	vcfg.metadata = punit->video_stream.metadata;
	vcfg.metadata_len = punit->video_stream.metadata_len;
	/* read video packet data, parse packet and put to video module */
	if (!(buf = alloc_video_buf(punit)))
		goto audio;
	punit->current_stream = &punit->video_stream;
	read_size = packet_read(punit, PREVIEW_PACKET, 8, &pos);
	total_packet_size = read_size;
	if (read_size == 0) { // the stream end
		ret = SessionCommand(punit->session_video, SSCMD_STREAM_STOP, NULL);
		punit->session_video = NULL;
		if (punit->session_audio != NULL) {
			ret = SessionCommand(punit->session_audio, SSCMD_STREAM_STOP, NULL);
			punit->session_audio = NULL;
		}
		goto exit;
	}
	if (read_size <= block_len) {
		read_size = packet_read(punit, PREVIEW_PACKET, read_size, &pos);
		nalstate.total_len = total_packet_size - cur_packet_size;
		ret = punit->video_write_packet(&vcfg, GET_PACKET_LEN, &nalstate, 0, &remain_size,
				load_buf->haddr, read_size);
		if (ret <= block_len) {
			if (!(buf = alloc_video_buf(punit)))
				goto audio;
			read_size = packet_read(punit, READ_PACKET, read_size, &pos);
			nalstate.total_len = total_packet_size - cur_packet_size;

#ifdef MEDIA_AV_SYNC
			buf->flags |= SYS_BUF_FLAG_TOP_PRESENT;
			buf->user.nHighPart = ss->current_count - 1;
			buf->user.nLowPart = punit->sync->v_cur_pkt_pts1;
			buf->offset = 0;
#endif
			ret = punit->video_write_packet(&vcfg, GET_PACKET_DATA, &nalstate,
					buf->haddr + buf->offset, &buf->size, load_buf->haddr, read_size);
			push_video_buf(punit, buf);
		} else
			slice_flag = 1;
	} else
		slice_flag = 1;

	if (slice_flag) { // if packet data size is large than buffer length
		while (cur_packet_size < total_packet_size) {
			if (first_enter == 0) {
				read_preview_opt = PREVIEW_FIRST_SLICE;
				bsf_preview_opt = GET_FIRST_SLICE_LEN;
				read_data_opt = READ_FIRST_SLICE;
				bsf_data_opt = GET_FIRST_SLICE_DATA;
			} else {
				read_preview_opt = PREVIEW_OTHER_CLICE;
				bsf_preview_opt = GET_OTHER_CLICE_LEN;
				read_data_opt = READ_OTHER_CLICE;
				bsf_data_opt = GET_OTHER_CLICE_DATA;
			}

			read_size = packet_read(punit, read_preview_opt, 8, &pos);
			nalstate.total_len = total_packet_size - cur_packet_size;
			ret = punit->video_write_packet(&vcfg, bsf_preview_opt, &nalstate, 0, &remain_size,
					load_buf->haddr, total_packet_size - cur_packet_size);
			if (ret <= block_len) {
				if (!(buf = alloc_video_buf(punit)))
					goto audio;
				read_size = packet_read(punit, read_data_opt, nalstate.nallen + 4, &pos);
				nalstate.total_len = total_packet_size - cur_packet_size;
#ifdef MEDIA_AV_SYNC
				if (read_data_opt == READ_FIRST_SLICE)
					buf->flags |= SYS_BUF_FLAG_TOP_PRESENT;
				else
					buf->flags &= ~SYS_BUF_FLAG_TOP_PRESENT;
				buf->user.nHighPart = ss->current_count - 1;
				buf->user.nLowPart = punit->sync->v_cur_pkt_pts1;
				buf->offset = 0;
#endif
				ret = punit->video_write_packet(&vcfg, bsf_data_opt, &nalstate,
						buf->haddr + buf->offset, &buf->size, load_buf->haddr, read_size);
				cur_packet_size += read_size;
				push_video_buf(punit, buf);
			} else { // if slice data large than block
				if (!(buf = alloc_video_buf(punit)))
					goto audio;
				remain_size = nalstate.nallen + 4;
				read_size = packet_read(punit, read_data_opt, block_len - (ret - (nalstate.nallen + 4)), &pos);
				nalstate.total_len = total_packet_size - cur_packet_size;
#ifdef MEDIA_AV_SYNC
				if (read_data_opt == READ_FIRST_SLICE)
					buf->flags |= SYS_BUF_FLAG_TOP_PRESENT;
				else 
					buf->flags &= ~SYS_BUF_FLAG_TOP_PRESENT;
				buf->user.nHighPart = ss->current_count - 1;
				buf->user.nLowPart = punit->sync->v_cur_pkt_pts1;
				buf->offset = 0;
#endif
				ret = punit->video_write_packet(&vcfg, bsf_data_opt, &nalstate,
						buf->haddr + buf->offset, &buf->size, load_buf->haddr, read_size);
				cur_packet_size += read_size;
				remain_size -= read_size;
				push_video_buf(punit, buf);
				while (remain_size > 0) {
					if (!(buf = alloc_video_buf(punit)))
						goto audio;
					if (remain_size < block_len)
						read_size = remain_size;
					else
						read_size = block_len;
					read_size = packet_read(punit, READ_CLICE_PURE, read_size, &pos);
					nalstate.total_len = total_packet_size - cur_packet_size;
#ifdef MEDIA_AV_SYNC
					buf->flags &= ~SYS_BUF_FLAG_TOP_PRESENT;
					buf->user.nHighPart = ss->current_count - 1;
					buf->user.nLowPart = punit->sync->v_cur_pkt_pts1;
					buf->offset = 0;
#endif
					ret = punit->video_write_packet(&vcfg, GET_CLICE_PURE_DATA, &nalstate,
							buf->haddr + buf->offset, &buf->size, load_buf->haddr, read_size);
					remain_size -= read_size;
					cur_packet_size += read_size;
					push_video_buf(punit, buf);
				}
			}
			first_enter = 1;
		}
	}
	/* parse audio */
audio:

	if (!(switch_flag % 2))
		goto exit;

	if (punit->session_audio != NULL) {
		if (punit->session_audio->state != SSTATE_RUNNING_IDLE) {
			goto exit;
		}
	} else {
		goto exit;
	}

	/* read audio packet data, parse packet and put to audio module */
	punit->current_stream = &punit->audio_stream;
	read_size = packet_read(punit, PREVIEW_PACKET, 8, &pos);
	total_packet_size = read_size;
	if (read_size == 0) { // the stream end
		ret = SessionCommand(punit->session_audio, SSCMD_STREAM_STOP, NULL);
		punit->session_audio = NULL;
		if (punit->session_video != NULL) {
			ret = SessionCommand(punit->session_video, SSCMD_STREAM_STOP, NULL);
			punit->session_video = NULL;
		}
		goto exit;
	}
	if (read_size + ADTS_HEADER_SIZE <= block_len) {
		if (!(buf = alloc_audio_buf(punit)))
			goto exit;
		read_size = packet_read(punit, READ_PACKET, read_size, &pos);
		aacconfig_t acfg;
		acfg.sample_rate = punit->audio_stream.audio_conf.sample_rate;
		acfg.sample_size = punit->audio_stream.audio_conf.sample_size;
		acfg.channel = punit->audio_stream.audio_conf.channel_count;
#ifdef MEDIA_AV_SYNC
		buf->flags |= SYS_BUF_FLAG_TOP_PRESENT;
		buf->user.nHighPart = ass->current_count - 1;
		buf->user.nLowPart = punit->sync->a_cur_pkt_dts;
		buf->offset = 0;
#endif
		ret = punit->audio_write_packet(&acfg, GET_PACKET_DATA, &nalstate,
				buf->haddr + buf->offset, &buf->size, load_buf->haddr, read_size);
		push_audio_buf(punit, buf);
	} else {
		// !!!: if the audio packet is large than buffer, add codes
		debug("the audio packet is large than buffer, plese add codes\n");
	}

exit:
	if (load_buf != punit->load_buf)
		sysbuf_free(load_buf);

	return load_buf->size;
}

static
SESSIONSTATE SESSIONAPI(SessionRun)(struct SESSION *session)
{
	int bstop = 0;
	SESSIONSTATE state;
	session_unit_t *punit = NULL;

	if (session != NULL)
		punit = (session_unit_t*)session->handle;
	if (punit == NULL)
		return SSTATE_NULL;

#ifdef MEDIA_AV_SYNC
	media_flush_time(&punit->sync->sys_time, NULL);
#endif

	if(session->state != SSTATE_RUNNING
		&& session->state != SSTATE_RUNNING_IDLE)
		return session->state;

	SESSIONAPI(StreamLoad)(punit);

	if (punit->session_video != NULL) {
		state = punit->session_video->SessionRun(punit->session_video);
		if (punit->eof && state == SSTATE_STOP) {
			bstop |= 1;
		}
	}

	if (punit->session_audio != NULL) {
		state = punit->session_audio->SessionRun(punit->session_audio);
		if (punit->eof && state == SSTATE_STOP) {
			bstop |= 2;
		}
	}

	if ((bstop & 3) == 3) {
		debug("\tSession %s set to SSTATE_STOP\n", session->name);
		session->state = SSTATE_STOP;
	}
	if (!punit->session_audio && !punit->session_video)
		session->state = SSTATE_STOP;

	return session->state;
}

static
int SESSIONAPI(SSCMD_STREAM_START)(struct SESSION *session, session_file_t *file)
{
	int res;
	session_unit_t *punit = NULL;
	if (session != NULL)
		punit = (session_unit_t*)session->handle; // allocated in SessionInit
	if (punit == NULL || session->state < SSTATE_INITED)
		return -EPERM;

	memset(punit, 0, sizeof(session_unit_t));

	if (file->filename) {
		if (f_open(&punit->file, file->filename, FA_READ) != FR_OK) {
			debug("open stream file error!\n");
			res = -EPERM;
			goto exit;
		} else {
			punit->load_size = f_size(&punit->file);
			debug("stream file size is %ld\n", punit->load_size);
		}
	}

	punit->readbuf = malloc(MP4_BUFLEN);
	if (!punit->readbuf) {
		debug("mp4 readbuf malloc fail!\n");
		return -ENOMEM;
	}
#ifndef CONFIG_MEDIA_EMULATE_ON_PC
	xthal_set_region_attribute((void *)(punit->readbuf), MP4_BUFLEN, XCHAL_CA_BYPASS, 0);
#endif
	/* Initial loading */
	punit->audio_stream.chunk_offset.bufaddr = punit->readbuf; // stco or co64
	punit->audio_stream.chunk_index.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN; // stsc
	punit->audio_stream.sample_size.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 2; // stsz
	punit->audio_stream.decode_time.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 3; // stts
	punit->audio_stream.composition_time.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 4; // ctts
	punit->audio_stream.sync_sample.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 5; // stss

	punit->video_stream.chunk_offset.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 6; // stco or co64
	punit->video_stream.chunk_index.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 7; // stsc
	punit->video_stream.sample_size.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 8; // stsz
	punit->video_stream.decode_time.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 9; // stts
	punit->video_stream.composition_time.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 10; // ctts
	punit->video_stream.sync_sample.bufaddr = punit->readbuf + MP4_INDEX_BUFLEN * 11; // stss

	punit->audio_stream.audio_conf.channel_count = file->ta.channel;
	punit->audio_stream.audio_conf.sample_rate = file->ta.sample_rate;
	punit->audio_stream.audio_conf.sample_size = file->ta.sample_size;

	/* Probe mp4 format type */
	if (mp4_probe(punit)) {
		debug("mp4 file probe error!\n");
		res = -EBADF;
		goto exit;
	}

	/* Get stream info from format file */
	if (mp4_streaminfo_get(punit)) {
		debug("get stream info from mp4 file error!\n");
		res = -EBADF;
		goto exit;
	}
#ifdef CONFIG_MEDIA_EMULATE_ON_PC
	file->tv.width = punit->video_stream.track_header.width;
	file->tv.height = punit->video_stream.track_header.height;
#endif

	/* Assignment function */
	if (file->tv.type == SST_H264) {
		punit->video_write_packet = media_bsf_h264toannexb;
	} else if (file->tv.type == SST_HEVC) {
		punit->video_write_packet = media_bsf_hevctoannexb;
	} else if (file->tv.type == SST_MPG2) {
		punit->video_write_packet = media_io_write_packet;
	} else if (file->tv.type == SST_MPG4) {
		punit->video_write_packet = media_mpeg4_to_m4v; //TODO: modify, add mp4v/esds tag parse
	} else {
		punit->video_write_packet = media_io_write_packet;
		debug("video type: %d not support currently!\n", file->tv.type);
	}
	if (file->ta.type == SST_MP3) {
		punit->audio_write_packet = media_io_write_packet;
	} else if ((file->ta.type == SST_AAC) || (file->ta.type == SST_AAC_ADTS)) {
		punit->audio_write_packet = media_adts_aac_write_packet;
	} else {
		// other type not support
		punit->audio_write_packet = media_io_write_packet;
		debug("audio type: %d not support currently!\n", file->ta.type);
	}

#ifdef MEDIA_AV_SYNC
	/* Init param */
	punit->sync = &punit->avsync;
	memset(punit->sync, 0, sizeof(session_av_sync)); // add on 20210111, fix the bug of dts error in circle play
	punit->sync->v_last_play_timestamp.last_tick = xthal_get_ccount();
	punit->sync->a_play_buf_flush_timestamp.last_tick = xthal_get_ccount();
	punit->sync->sys_time.last_tick = xthal_get_ccount();
	punit->sync->a_real_rate = 44100; // default value, modify in i2sout module
	punit->sync->v_dts_time_base = punit->video_stream.media_header.time_scale;
	punit->sync->a_dts_time_base = punit->audio_stream.media_header.time_scale;
	punit->sync->v_pkt_duration = ((float)punit->video_stream.media_header.duration / punit->video_stream.media_header.time_scale) * 1000
								/ punit->video_stream.sample_size.total_count;
	debug("video pakcet duration: %fms, ", punit->sync->v_pkt_duration);
	punit->sync->v_real_duration = punit->sync->v_pkt_duration;
	punit->sync->a_pkt_duration = ((float)punit->audio_stream.media_header.duration / punit->audio_stream.media_header.time_scale) * 1000
								/ punit->audio_stream.sample_size.total_count;
	debug("audio packet duration: %fms\n", punit->sync->a_pkt_duration);
#endif

	file->ta.sample_rate = punit->audio_stream.audio_conf.sample_rate;
	file->ta.sample_size = punit->audio_stream.audio_conf.sample_size;
	file->ta.channel = punit->audio_stream.audio_conf.channel_count;

	f_lseek(&punit->file, 0); // rewind to origin
	if (punit->load_buf)
		punit->load_buf->size = 0;

	session_file_t fl;
	punit->session_video = SessionGet(file->tv.type);
	if (punit->session_video != NULL) {
		fl.type = file->tv.type;
		fl.filename = NULL;
		fl.tv = file->tv;
#ifdef MEDIA_AV_SYNC
		fl.sync = &punit->avsync;
#endif
		fl.ftype = file->ftype;
		res = SessionCommand(punit->session_video, SSCMD_STREAM_START, &fl);
		if (res) {
			punit->session_video = NULL;
			goto exit;
		}
	}

	punit->session_audio = SessionGet(file->ta.type);
	if (punit->session_audio != NULL) {
		fl.type = file->ta.type;
		fl.filename = NULL;
		fl.ta = file->ta;
#ifdef MEDIA_AV_SYNC
		fl.sync = &punit->avsync;
#endif
		fl.ftype = file->ftype;
		res = SessionCommand(punit->session_audio, SSCMD_STREAM_START, &fl);
		if (res) {
			punit->session_audio = NULL;
			res = SessionCommand(punit->session_video, SSCMD_STREAM_STOP, NULL);
			goto exit;
		}
#ifdef MEDIA_AV_SYNC
		punit->sync->v_real_duration = (punit->sync->v_pkt_duration * punit->sync->a_real_rate)
												/ file->ta.sample_rate;
#endif
	}

	SessionBufferDeInit(session);

	session->state = SSTATE_RUNNING;
	debug("\tSession %s set to SSTATE_RUNNING in SSCMD_STREAM_START\n", session->name);

	return 0;

exit:
	if (punit->readbuf) {
		free(punit->readbuf);
		punit->readbuf = NULL;
	}
	res = SessionCommand(punit->session, SSCMD_STREAM_STOP, NULL);
	return res;
}

static
int SESSIONAPI(SSCMD_STREAM_STOP)(struct SESSION *session)
{
	int res;

	session_unit_t *punit = NULL;
	if (session != NULL)
		punit = (session_unit_t*)session->handle;
	if (punit == NULL || session->state < SSTATE_RUNNING)
		return -EPERM;

	// set state
	session->state = SSTATE_STOP;
	debug("\tSession %s set to SSTATE_STOP in SSCMD_STREAM_STOP\n", session->name);

	if (punit->readbuf) {
		free(punit->readbuf);
		punit->readbuf = NULL;
	}

	if (punit->load_buf)
		sysbuf_free(punit->load_buf);
	punit->load_buf = NULL;

	if (punit->session_video != NULL) {
		res = SessionCommand(punit->session_video, SSCMD_STREAM_STOP, NULL);
		if (res) {
		}
		punit->session_video = NULL;
	}

	if (punit->loadbuf_video)
		sysbuf_free(punit->loadbuf_video);
	punit->loadbuf_video = NULL;

	if (punit->session_audio != NULL) {
		res = SessionCommand(punit->session_audio, SSCMD_STREAM_STOP, NULL);
		if (res) {
		}
		punit->session_audio = NULL;
	}

	if (punit->loadbuf_audio)
		sysbuf_free(punit->loadbuf_audio);
	punit->loadbuf_audio = NULL;

#ifdef CONFIG_MEDIA_EMULATE_ON_PC
	if (punit->file)
		f_close(&punit->file);
#else
	f_close(&punit->file);
#endif
	SessionBufferDeInit(session);

	// set state
	session->state = SSTATE_INITED;

	return 0;
}

static
int SESSIONAPI(SSCMD_STREAM_PAUSE)(struct SESSION *session)
{
	session_unit_t *punit = NULL;
	if (session != NULL)
		punit = (session_unit_t*)session->handle;
	if (punit == NULL)
		return -EACCES;

	if(session->state == SSTATE_RUNNING
		|| session->state == SSTATE_RUNNING_IDLE )
		session->state = SSTATE_RUNNING_PAUSE;
	if (punit->session_video != NULL)
		SessionCommand(punit->session_video, SSCMD_STREAM_PAUSE, NULL);
	if (punit->session_audio != NULL)
		SessionCommand(punit->session_audio, SSCMD_STREAM_PAUSE, NULL);

	return 0;
}

static
int SESSIONAPI(SSCMD_STREAM_RESUME)(struct SESSION *session)
{
	session_unit_t *punit = NULL;
	if (session != NULL)
		punit = (session_unit_t*)session->handle;
	if (punit == NULL)
		return -EACCES;

	if(session->state == SSTATE_RUNNING_PAUSE)
		session->state = SSTATE_RUNNING;
	if (punit->session_video != NULL)
		SessionCommand(punit->session_video, SSCMD_STREAM_RESUME, NULL);
	if (punit->session_audio != NULL)
		SessionCommand(punit->session_audio, SSCMD_STREAM_RESUME, NULL);

	return 0;
}

static
int SESSIONAPI(SSCMD_STREAM_SKIP)(struct SESSION *session, int *skip_time)
{
	session_unit_t *punit = NULL;
	if (session != NULL)
		punit = (session_unit_t*)session->handle;
	if (punit == NULL)
		return -EACCES;

	/* Fast forward or fast rewind, skip unit ms */
	if (punit->session_video != NULL) {
		sync_sample_t *ss = &punit->video_stream.sync_sample;
		/* get packet number want skip */
#ifdef MEDIA_AV_SYNC
		punit->skip_video_key_pkt = (int)((float)*skip_time / punit->sync->v_pkt_duration);
#endif
		punit->skip_video_key_pkt += punit->video_stream.sample_size.current_count;
		if (punit->skip_video_key_pkt < 1)
			punit->skip_video_key_pkt = 1;

		int now, next, last;
		if (ss->current_count <= 0)
			ss->current_count = 1;
		if (ss->current_count >= ss->total_count)
			ss->current_count = ss->total_count - 1;
		while (ss->current_count > 0 && ss->current_count < ss->total_count) {
			if (punit->skip_video_key_pkt >= ss->sample_number)	 {
				now = ss->sample_number;
				update_sync_sample(punit, FORWARD, ss);
				next = ss->sample_number;
				if (punit->skip_video_key_pkt >= now && punit->skip_video_key_pkt < next) {
					punit->skip_video_key_pkt = (*skip_time >= 0 ? next : now);
					debug("@@@@@@@@video skip to key frame[%d]@@@@@@@@\n", punit->skip_video_key_pkt);
					break;
				}
			} else {
				now = ss->sample_number;
				update_sync_sample(punit, REWIND, ss);
				last = ss->sample_number;
				if (punit->skip_video_key_pkt >= last && punit->skip_video_key_pkt < now) {
					punit->skip_video_key_pkt =  (*skip_time >= 0 ? now : last);
					debug("########video skip to key frame[%d]#######\n", punit->skip_video_key_pkt);
					break;
				}
			}
		}
	}

#ifdef MEDIA_AV_SYNC
	if (punit->session_video != NULL) {
		//!!!: get skip_audio_pkt_num use I-Frame dts match with audio packet dts
		punit->skip_audio_pkt_num = punit->skip_video_key_pkt * punit->sync->v_pkt_duration
									/ punit->sync->a_pkt_duration;
	} else {
		punit->skip_audio_pkt_num = (int)((float)*skip_time / punit->sync->a_pkt_duration);
		punit->skip_audio_pkt_num += punit->audio_stream.sample_size.current_count;
	}
	if (punit->skip_audio_pkt_num < 1)
		punit->skip_audio_pkt_num = 1;
	debug("$$$$$$$$audio skip to key frame[%d]$$$$$$$$\n", punit->skip_audio_pkt_num);
#endif

	/* skip the video and audio packets */
	if (punit->session_video != NULL)
		packet_skip(punit, MP4_TYPE_VIDEO, punit->skip_video_key_pkt);
	if (punit->session_audio != NULL)
		packet_skip(punit, MP4_TYPE_AUDIO, punit->skip_audio_pkt_num);
#ifdef MEDIA_AV_SYNC
	punit->sync->skip_flag = MP4_TYPE_VIDEO | MP4_TYPE_AUDIO; // 0x03
#endif

#ifdef MEDIA_SKIP_NOT_HW_RESET
#else
	SessionBufferDeInit(session);

	if (punit->session_video != NULL)
		SessionCommand(punit->session_video, SSCMD_STREAM_SKIP, NULL);
	if (punit->session_audio != NULL)
		SessionCommand(punit->session_audio, SSCMD_STREAM_SKIP, NULL);
#endif

	return 0;
}

static
int SESSIONAPI(SSCMD_STREAM_GETCURRENTPLAYTIME)(struct SESSION *session, void *val)
{
	uint64_t current_play_time; // unit: ms
	uint64_t *p_cur_time = (uint64_t *)val;
	session_unit_t *punit = NULL;
	if (session != NULL)
		punit = (session_unit_t*)session->handle;
	if (punit == NULL)
		return -EACCES;

#ifdef MEDIA_AV_SYNC
	/* Get current play time, unit ms */
	if (punit->session_video != NULL) {
		current_play_time = (punit->sync->v_cur_pkt_pts2 * 1000) / punit->sync->v_dts_time_base;
	} else if (punit->session_audio != NULL) {
		//!!!: subtracting the i2sout buffer time
		current_play_time = (punit->sync->a_cur_pkt_dts * 1000) / punit->sync->a_dts_time_base;
	} else {
		*p_cur_time = 0;
		return 0;
	}
	*p_cur_time = current_play_time;
#endif

	return (int)current_play_time;
}

static
int SESSIONAPI(SessionCommand)(struct SESSION *session, int cmd, void *params)
{
	int ret = 0;
	session_unit_t *punit = NULL;
	if (session != NULL)
		punit = (session_unit_t*)session->handle;
	if (session == NULL || punit == NULL)
		return -EINVAL;

	switch(cmd)
	{
		case SSCMD_STREAM_START:
		{
			debug("Session '%s' command 'SSCMD_STREAM_START'\n", session->name);
			return SESSIONAPI(SSCMD_STREAM_START)(session, (session_file_t *)params);
		}break;
		case SSCMD_STREAM_PAUSE:
		{
			debug("Session '%s' command  'SSCMD_STREAM_PAUSE'\n", session->name);
			return SESSIONAPI(SSCMD_STREAM_PAUSE)(session);
		}break;
		case SSCMD_STREAM_RESUME:
		{
			debug("Session '%s' command  'SSCMD_STREAM_RESUME'\n", session->name);
			return SESSIONAPI(SSCMD_STREAM_RESUME)(session);
		}break;
		case SSCMD_STREAM_STOP:
		{
			debug("Session '%s' command  'SSCMD_STREAM_STOP'\n", session->name);
			return SESSIONAPI(SSCMD_STREAM_STOP)(session);
		}break;
		case SSCMD_STREAM_BUFCHANGED:
		{
		}break;
		case SSCMD_STREAM_GETVALIDINSIZE:
		{
		}break;
		case SSCMD_STREAM_SKIP:
		{
			return SESSIONAPI(SSCMD_STREAM_SKIP)(session, (int *)params);
		}break;
		case SSCMD_STREAM_GETCURRENTPLAYTIME:
		{
			return SESSIONAPI(SSCMD_STREAM_GETCURRENTPLAYTIME)(session, (int *)params);
		}break;
		default:
		{
			debug("Session '%s' command %d unknown !\n", session->name, cmd);
			ret = -EINVAL;
		};
	};

	return ret;
}

static
void SESSIONAPI(SessionDeInit)(struct SESSION *session)
{
	session_unit_t *punit = NULL;
	if (session != NULL)
		punit = (session_unit_t *)session->handle;
	else
		return;

	debug("SessionDeInit : %s ..\n", session->name);

	if (punit->readbuf) {
		free(punit->readbuf);
		punit->readbuf = NULL;
	}

	SessionBufferDeInit(session);

	if (punit != NULL) {
		free(punit);
	}

	session->handle = NULL;
}

static
SESSIONHANDLE SESSIONAPI(SessionInit)(struct SESSION *session)
{
	session_unit_t *punit = (session_unit_t *)malloc(sizeof(session_unit_t) + 64);
	if (punit == NULL)
		return NULL;

	debug("SessionInit : %s ..\n", session->name);

	memset(punit, 0, sizeof(session_unit_t));
	punit->session = session;
	session->SessionRun = &SESSIONAPI(SessionRun);
	session->SessionCommand = &SESSIONAPI(SessionCommand);
	session->SessionDeInit = &SESSIONAPI(SessionDeInit);
	session->state = SSTATE_INITED;

	SessionBufferInit(session);

	session->handle = (SESSIONHANDLE)punit;
	return session->handle;
}

struct SESSION gSession_mp4 =
{
	"mp4", (1 << SST_MP4), &SESSIONAPI(SessionInit)
};
